/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
This file is part of GCC.
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 3, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
3, /* vec_unalign_load_cost. */
3, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 2, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
2, /* vec_unalign_load_cost. */
2, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
+ 2, /* cond_taken_branch_cost. */
1, /* cond_not_taken_branch_cost. */
};
was set or cleared on the command line. */
static int ix86_isa_flags_explicit;
-/* Define a set of ISAs which aren't available for a given ISA. MMX
- and SSE ISAs are handled separately. */
+/* Define a set of ISAs which are available when a given ISA is
+ enabled. MMX and SSE ISAs are handled separately. */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+ (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+ as -msse4.2. */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE5_SET \
+ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
+
+/* Define a set of ISAs which aren't available when a given ISA is
+ disabled. MMX and SSE ISAs are handled separately. */
#define OPTION_MASK_ISA_MMX_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
#define OPTION_MASK_ISA_SSE_UNSET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
+ (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
#define OPTION_MASK_ISA_SSE2_UNSET \
- (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
#define OPTION_MASK_ISA_SSE3_UNSET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
+ (OPTION_MASK_ISA_SSE3 \
+ | OPTION_MASK_ISA_SSSE3_UNSET \
+ | OPTION_MASK_ISA_SSE4A_UNSET )
#define OPTION_MASK_ISA_SSSE3_UNSET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
- as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4 \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
+/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
+ as -mno-sse4.1. */
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
-#define OPTION_MASK_ISA_SSE5_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
/* Implement TARGET_HANDLE_OPTION. */
switch (code)
{
case OPT_mmmx:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
return true;
case OPT_m3dnow:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
return false;
case OPT_msse:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
return true;
case OPT_msse2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
return true;
case OPT_msse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
return true;
case OPT_mssse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
return true;
case OPT_msse4_1:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
return true;
case OPT_msse4_2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
return true;
case OPT_msse4:
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
return true;
case OPT_mno_sse4:
return true;
case OPT_msse4a:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
return true;
case OPT_msse5:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
- /* Turn on SSE4A bultins for -msse5. */
- if (TARGET_SSE5)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
-
- /* Turn on SSE4.1 builtins for -msse4.2. */
- if (TARGET_SSE4_2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
-
- /* Turn on SSSE3 builtins for -msse4.1. */
- if (TARGET_SSE4_1)
- ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
-
- /* Turn on SSE3 builtins for -mssse3. */
- if (TARGET_SSSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE3 builtins for -msse4a. */
- if (TARGET_SSE4A)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE2 builtins for -msse3. */
- if (TARGET_SSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
-
- /* Turn on SSE builtins for -msse2. */
- if (TARGET_SSE2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE;
-
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
x86_prefetch_sse = true;
}
- /* Turn on MMX builtins for 3Dnow. */
- if (TARGET_3DNOW)
- ix86_isa_flags |= OPTION_MASK_ISA_MMX;
-
/* Turn on popcnt instruction for -msse4.2 or -mabm. */
if (TARGET_SSE4_2 || TARGET_ABM)
x86_popcnt = true;
/* Use external vectorized library in vectorizing intrinsics. */
if (ix86_veclibabi_string)
{
- if (strcmp (ix86_veclibabi_string, "acml") == 0)
+ if (strcmp (ix86_veclibabi_string, "svml") == 0)
+ ix86_veclib_handler = ix86_veclibabi_svml;
+ else if (strcmp (ix86_veclibabi_string, "acml") == 0)
ix86_veclib_handler = ix86_veclibabi_acml;
else
error ("unknown vectorization library ABI type (%s) for "
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
+ /* If stack probes are required, the space used for large function
+ arguments on the stack must also be probed, so enable
+ -maccumulate-outgoing-args so this happens in the prologue. */
+ if (TARGET_STACK_PROBE
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "stack probing requires -maccumulate-outgoing-args "
+ "for correctness");
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
/* For sane SSE instruction set generation we need fcomi instruction.
It is safe to enable all CMOVE instructions. */
if (TARGET_SSE)
indirectly or considering a libcall. Otherwise return 0. */
static int
-ix86_function_sseregparm (const_tree type, const_tree decl)
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
{
gcc_assert (!TARGET_64BIT);
{
if (!TARGET_SSE)
{
- if (decl)
- error ("Calling %qD with attribute sseregparm without "
- "SSE/SSE2 enabled", decl);
- else
- error ("Calling %qT with attribute sseregparm without "
- "SSE/SSE2 enabled", type);
+ if (warn)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ }
return 0;
}
rtx libname, /* SYMBOL_REF of library name or 0 */
tree fndecl)
{
+ struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
/* Set up the number of registers to use for passing arguments. */
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
cum->warn_mmx = true;
+
+ /* Because type might mismatch in between caller and callee, we need to
+ use actual type of function for local calls.
+ FIXME: cgraph_analyze can be told to actually record if function uses
+ va_start so for local functions maybe_vaarg can be made aggressive
+ helping K&R code.
+ FIXME: once typesytem is fixed, we won't need this code anymore. */
+ if (i && i->local)
+ fntype = TREE_TYPE (fndecl);
cum->maybe_vaarg = (fntype
? (!prototype_p (fntype) || stdarg_p (fntype))
: !libname);
/* Set up the number of SSE registers used for passing SFmode
and DFmode arguments. Warn for mismatching ABI. */
- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
}
}
}
/* for V1xx modes, just use the base mode */
- if (VECTOR_MODE_P (mode)
+ if (VECTOR_MODE_P (mode) && mode != V1DImode
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
mode = GET_MODE_INNER (mode);
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
+ case V1DImode:
case V2SFmode:
case V2SImode:
case V4HImode:
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
cum->mmx_words += words;
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
- if (!TARGET_64BIT)
+ /* Decimal floating point is aligned to its natural boundary. */
+ if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
{
/* i386 ABI defines all arguments to be 4 byte aligned. We have to
make an exception for SSE modes since these require 128bit
align = PARM_BOUNDARY;
}
}
- if (align > 128)
- align = 128;
+ if (align > BIGGEST_ALIGNMENT)
+ align = BIGGEST_ALIGNMENT;
return align;
}
SSE math is enabled or for functions with sseregparm attribute. */
if ((fn || fntype) && (mode == SFmode || mode == DFmode))
{
- int sse_level = ix86_function_sseregparm (fntype, fn);
+ int sse_level = ix86_function_sseregparm (fntype, fn, false);
if ((sse_level >= 1 && mode == SFmode)
|| (sse_level == 2 && mode == DFmode))
regno = FIRST_SSE_REG;
We also may end up assuming that only 64bit values are stored in SSE
register let some floating point program work. */
- if (ix86_preferred_stack_boundary >= 128)
- cfun->stack_alignment_needed = 128;
+ if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
+ cfun->stack_alignment_needed = BIGGEST_ALIGNMENT;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
ix86_expand_vector_extract (false, target, fp_xmm, 0);
}
+/* Not used, but eases macroization of patterns. */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+ rtx input ATTRIBUTE_UNUSED)
+{
+ gcc_unreachable ();
+}
+
/* Convert an unsigned SImode value into a DFmode. Only currently used
for SSE, but applicable anywhere. */
rtx operands[])
{
rtx mask, set, use, clob, dst, src;
- bool matching_memory;
bool use_sse = false;
bool vector_mode = VECTOR_MODE_P (mode);
enum machine_mode elt_mode = mode;
dst = operands[0];
src = operands[1];
- /* If the destination is memory, and we don't have matching source
- operands or we're using the x87, do things in registers. */
- matching_memory = false;
- if (MEM_P (dst))
- {
- if (use_sse && rtx_equal_p (dst, src))
- matching_memory = true;
- else
- dst = gen_reg_rtx (mode);
- }
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
if (vector_mode)
{
set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
else
emit_insn (set);
}
-
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
}
/* Expand a copysign operation. Special case operand 0 being a constant. */
ix86_expand_branch (code, label);
return;
}
+ break;
case LE: case LEU: case GT: case GTU:
if (lo[1] == constm1_rtx)
{
ix86_expand_branch (code, label);
return;
}
+ break;
default:
break;
}
int
ix86_constant_alignment (tree exp, int align)
{
- if (TREE_CODE (exp) == REAL_CST)
+ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+ || TREE_CODE (exp) == INTEGER_CST)
{
if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
return 64;
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
static const struct builtin_description bdesc_1arg[] =
{
+ /* SSE */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
+ /* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V1DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
tree V2DI_type_node
= build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
tree v4hi_ftype_v4hi_int
= build_function_type_list (V4HI_type_node,
V4HI_type_node, integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_di
+ tree v2si_ftype_v2si_int
= build_function_type_list (V2SI_type_node,
- V2SI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+
tree void_ftype_void
= build_function_type (void_type_node, void_list_node);
tree void_ftype_unsigned
tree v2si_ftype_v2si_v2si
= build_function_type_list (V2SI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree di_ftype_di_di
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
tree di_ftype_di_di_int
= build_function_type_list (long_long_unsigned_type_node,
tree v4si_ftype_v8hi_v8hi
= build_function_type_list (V4SI_type_node,
V8HI_type_node, V8HI_type_node, NULL_TREE);
- tree di_ftype_v8qi_v8qi
- = build_function_type_list (long_long_unsigned_type_node,
+ tree v1di_ftype_v8qi_v8qi
+ = build_function_type_list (V1DI_type_node,
V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree di_ftype_v2si_v2si
- = build_function_type_list (long_long_unsigned_type_node,
+ tree v1di_ftype_v2si_v2si
+ = build_function_type_list (V1DI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
tree v2di_ftype_v16qi_v16qi
= build_function_type_list (V2DI_type_node,
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case DImode:
- type = di_ftype_di_di;
+ case V1DImode:
+ type = v1di_ftype_v1di_v1di;
break;
default:
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
error ("the third argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the third argument must be a 2-bit immediate");
+ return const0_rtx;
+
default:
error ("the third argument must be an 8-bit immediate");
return const0_rtx;
emit_insn (pat);
return target;
+ case IX86_BUILTIN_PSLLW:
+ case IX86_BUILTIN_PSLLWI:
+ icode = CODE_FOR_mmx_ashlv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLD:
+ case IX86_BUILTIN_PSLLDI:
+ icode = CODE_FOR_mmx_ashlv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLQ:
+ case IX86_BUILTIN_PSLLQI:
+ icode = CODE_FOR_mmx_ashlv1di3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAW:
+ case IX86_BUILTIN_PSRAWI:
+ icode = CODE_FOR_mmx_ashrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAD:
+ case IX86_BUILTIN_PSRADI:
+ icode = CODE_FOR_mmx_ashrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLW:
+ case IX86_BUILTIN_PSRLWI:
+ icode = CODE_FOR_mmx_lshrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLD:
+ case IX86_BUILTIN_PSRLDI:
+ icode = CODE_FOR_mmx_lshrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLQ:
+ case IX86_BUILTIN_PSRLQI:
+ icode = CODE_FOR_mmx_lshrv1di3;
+ goto do_pshift;
+
case IX86_BUILTIN_PSLLW128:
case IX86_BUILTIN_PSLLWI128:
icode = CODE_FOR_ashlv8hi3;
return NULL_TREE;
}
-/* Handler for an ACML-style interface to a library with vectorized
- intrinsics. */
+/* Handler for an SVML-style interface to
+ a library with vectorized intrinsics. */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+ char name[20];
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+ enum machine_mode el_mode, in_mode;
+ int n, in_n;
+
+ /* The SVML is suitable for unsafe math only. */
+ if (!flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type_out));
+ n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (el_mode != in_mode
+ || n != in_n)
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG10:
+ case BUILT_IN_POW:
+ case BUILT_IN_TANH:
+ case BUILT_IN_TAN:
+ case BUILT_IN_ATAN:
+ case BUILT_IN_ATAN2:
+ case BUILT_IN_ATANH:
+ case BUILT_IN_CBRT:
+ case BUILT_IN_SINH:
+ case BUILT_IN_SIN:
+ case BUILT_IN_ASINH:
+ case BUILT_IN_ASIN:
+ case BUILT_IN_COSH:
+ case BUILT_IN_COS:
+ case BUILT_IN_ACOSH:
+ case BUILT_IN_ACOS:
+ if (el_mode != DFmode || n != 2)
+ return NULL_TREE;
+ break;
+
+ case BUILT_IN_EXPF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG10F:
+ case BUILT_IN_POWF:
+ case BUILT_IN_TANHF:
+ case BUILT_IN_TANF:
+ case BUILT_IN_ATANF:
+ case BUILT_IN_ATAN2F:
+ case BUILT_IN_ATANHF:
+ case BUILT_IN_CBRTF:
+ case BUILT_IN_SINHF:
+ case BUILT_IN_SINF:
+ case BUILT_IN_ASINHF:
+ case BUILT_IN_ASINF:
+ case BUILT_IN_COSHF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_ACOSHF:
+ case BUILT_IN_ACOSF:
+ if (el_mode != SFmode || n != 4)
+ return NULL_TREE;
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+
+ if (fn == BUILT_IN_LOGF)
+ strcpy (name, "vmlsLn4");
+ else if (fn == BUILT_IN_LOG)
+ strcpy (name, "vmldLn2");
+ else if (n == 4)
+ {
+ sprintf (name, "vmls%s", bname+10);
+ name[strlen (name)-1] = '4';
+ }
+ else
+ sprintf (name, "vmld%s2", bname+10);
+
+ /* Convert to uppercase. */
+ name[4] &= ~0x20;
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ else
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+ /* Build a function declaration for the vectorized function. */
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+ a library with vectorized intrinsics. */
static tree
ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
where integer modes in MMX/SSE registers are not tieable
because of missing QImode and HImode moves to, from or between
MMX/SSE registers. */
- return MAX (ix86_cost->mmxsse_to_integer, 8);
+ return MAX (8, ix86_cost->mmxsse_to_integer);
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->fp_move;
{
tree type = TREE_TYPE (function);
bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
+ int nregs;
if (TARGET_64BIT)
{
return gen_rtx_REG (DImode, parm_regs[aggr]);
}
- if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
+ nregs = ix86_function_regparm (type, function);
+
+ if (nregs > 0 && !stdarg_p (type))
{
- int regno = AX_REG;
+ int regno;
+
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
- regno = CX_REG;
+ regno = aggr ? DX_REG : CX_REG;
+ else
+ {
+ regno = AX_REG;
+ if (aggr)
+ {
+ regno = DX_REG;
+ if (nregs == 1)
+ return gen_rtx_MEM (SImode,
+ plus_constant (stack_pointer_rtx, 4));
+ }
+ }
return gen_rtx_REG (SImode, regno);
}
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
n_var++, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;
/* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
- /* x0 = 1./b estimate */
+ /* x0 = rcp(b) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
bool recip)
{
- rtx x0, e0, e1, e2, e3, three, half, zero, mask;
+ rtx x0, e0, e1, e2, e3, mthree, mhalf;
+ REAL_VALUE_TYPE r;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
e2 = gen_reg_rtx (mode);
e3 = gen_reg_rtx (mode);
- three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
- half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
+ real_from_integer (&r, VOIDmode, -3, -1, 0);
+ mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
- mask = gen_reg_rtx (mode);
+ real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
+ mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
if (VECTOR_MODE_P (mode))
{
- three = ix86_build_const_vector (SFmode, true, three);
- half = ix86_build_const_vector (SFmode, true, half);
+ mthree = ix86_build_const_vector (SFmode, true, mthree);
+ mhalf = ix86_build_const_vector (SFmode, true, mhalf);
}
- three = force_reg (mode, three);
- half = force_reg (mode, half);
+ /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
+ rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
- zero = force_reg (mode, CONST0_RTX(mode));
-
- /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
- 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
-
- /* Compare a to zero. */
- emit_insn (gen_rtx_SET (VOIDmode, mask,
- gen_rtx_NE (mode, a, zero)));
-
- /* x0 = 1./sqrt(a) estimate */
+ /* x0 = rsqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT)));
- /* Filter out infinity. */
- if (VECTOR_MODE_P (mode))
- emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
- gen_rtx_AND (mode,
- gen_lowpart (V4SFmode, x0),
- gen_lowpart (V4SFmode, mask))));
- else
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_AND (mode, x0, mask)));
+
+ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
+ if (!recip)
+ {
+ rtx zero, mask;
+
+ zero = gen_reg_rtx (mode);
+ mask = gen_reg_rtx (mode);
+
+ zero = force_reg (mode, CONST0_RTX(mode));
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, zero, a)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+ }
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (VOIDmode, e0,
/* e1 = e0 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e1,
gen_rtx_MULT (mode, e0, x0)));
- /* e2 = 3. - e1 */
+
+ /* e2 = e1 - 3. */
+ mthree = force_reg (mode, mthree);
emit_insn (gen_rtx_SET (VOIDmode, e2,
- gen_rtx_MINUS (mode, three, e1)));
+ gen_rtx_PLUS (mode, e1, mthree)));
+
+ mhalf = force_reg (mode, mhalf);
if (recip)
- /* e3 = .5 * x0 */
+ /* e3 = -.5 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e3,
- gen_rtx_MULT (mode, half, x0)));
+ gen_rtx_MULT (mode, x0, mhalf)));
else
- /* e3 = .5 * e0 */
+ /* e3 = -.5 * e0 */
emit_insn (gen_rtx_SET (VOIDmode, e3,
- gen_rtx_MULT (mode, half, e0)));
+ gen_rtx_MULT (mode, e0, mhalf)));
/* ret = e2 * e3 */
emit_insn (gen_rtx_SET (VOIDmode, res,
gen_rtx_MULT (mode, e2, e3)));