GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
{3, 3, 3}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
+ 0, /* size of l1 cache */
+ 0, /* size of l2 cache */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
+ 0, /* size of l1 cache */
+ 0, /* size of l2 cache */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
+ 4, /* size of l1 cache. 486 has 8kB cache
+ shared for code and data, so 4kB is
+ not really precise. */
+ 4, /* size of l2 cache */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 8, /* size of l2 cache */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 256, /* size of l2 cache */
32, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
{1, 1, 1}, /* cost of storing SSE registers
in SImode, DImode and TImode */
1, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 128, /* size of l2 cache. */
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
6, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 32, /* size of l2 cache. Some models
+ have integrated l2 cache, but
+ optimizing for k6 is not important
+ enough to worry about that. */
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
64, /* size of prefetch block */
/* New AMD processors never drop prefetches; if they cannot be performed
immediately, they are queued. We set number of simultaneous prefetches
1/1 1/1
MOVD reg32, xmmreg Double FADD 3
1/1 1/1 */
+ 64, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
64, /* size of prefetch block */
/* New AMD processors never drop prefetches; if they cannot be performed
immediately, they are queued. We set number of simultaneous prefetches
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
10, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
{12, 12, 12}, /* cost of storing SSE registers
in SImode, DImode and TImode */
8, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 1024, /* size of l2 cache. */
128, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
{4, 4, 4}, /* cost of storing SSE registers
in SImode, DImode and TImode */
2, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 2048, /* size of l2 cache. */
128, /* size of prefetch block */
8, /* number of parallel prefetches */
3, /* Branch cost */
{8, 8, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
/* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
{8, 8, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
3, /* Branch cost */
\f
static struct machine_function * ix86_init_machine_status (void);
-static rtx ix86_function_value (tree, tree, bool);
-static int ix86_function_regparm (tree, tree);
+static rtx ix86_function_value (const_tree, const_tree, bool);
+static int ix86_function_regparm (const_tree, const_tree);
static void ix86_compute_frame_layout (struct ix86_frame *);
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
rtx, rtx, int);
#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
+/* Vectorization library interface and handlers. */
+tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
+
/* Implement TARGET_HANDLE_OPTION. */
static bool
{&i386_cost, 4, 3, 4, 3, 4},
{&i486_cost, 16, 15, 16, 15, 16},
{&pentium_cost, 16, 7, 16, 7, 16},
- {&pentiumpro_cost, 16, 15, 16, 7, 16},
+ {&pentiumpro_cost, 16, 15, 16, 10, 16},
{&geode_cost, 0, 0, 0, 0, 0},
{&k6_cost, 32, 7, 32, 7, 32},
{&athlon_cost, 16, 7, 16, 7, 16},
{&pentium4_cost, 0, 0, 0, 0, 0},
{&k8_cost, 16, 7, 16, 7, 16},
{&nocona_cost, 0, 0, 0, 0, 0},
- {&core2_cost, 16, 7, 16, 7, 16},
+ {&core2_cost, 16, 10, 16, 10, 16},
{&generic32_cost, 16, 7, 16, 7, 16},
- {&generic64_cost, 16, 7, 16, 7, 16},
+ {&generic64_cost, 16, 10, 16, 10, 16},
{&amdfam10_cost, 32, 24, 32, 7, 32}
};
x86_popcnt = true;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
- if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
+ if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
x86_sahf = true;
break;
if (!TARGET_80387)
target_flags &= ~MASK_FLOAT_RETURNS;
+ /* Use external vectorized library in vectorizing intrinsics. */
+ if (ix86_veclibabi_string)
+ {
+ if (strcmp (ix86_veclibabi_string, "acml") == 0)
+ ix86_veclib_handler = ix86_veclibabi_acml;
+ else
+ error ("unknown vectorization library ABI type (%s) for "
+ "-mveclibabi= switch", ix86_veclibabi_string);
+ }
+
if ((x86_accumulate_outgoing_args & ix86_tune_mask)
&& !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
ix86_cost->simultaneous_prefetches);
if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
+ if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
+ set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
+ if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
+ set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
}
\f
/* Return true if this goes in large data/bss. */
warning to be generated). */
static int
-ix86_comp_type_attributes (tree type1, tree type2)
+ix86_comp_type_attributes (const_tree type1, const_tree type2)
{
/* Check for mismatch of non-default calling convention. */
const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
or considering a libcall. */
static int
-ix86_function_regparm (tree type, tree decl)
+ix86_function_regparm (const_tree type, const_tree decl)
{
tree attr;
int regparm = ix86_regparm;
if (decl && TREE_CODE (decl) == FUNCTION_DECL
&& flag_unit_at_a_time && !profile_flag)
{
- struct cgraph_local_info *i = cgraph_local_info (decl);
+ /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
if (i && i->local)
{
int local_regparm, globals = 0, regno;
indirectly or considering a libcall. Otherwise return 0. */
static int
-ix86_function_sseregparm (tree type, tree decl)
+ix86_function_sseregparm (const_tree type, const_tree decl)
{
gcc_assert (!TARGET_64BIT);
(and DFmode for SSE2) arguments in SSE registers. */
if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
{
- struct cgraph_local_info *i = cgraph_local_info (decl);
+ /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
if (i && i->local)
return TARGET_SSE2 ? 2 : 1;
}
/* Return if we do not know how to pass TYPE solely in registers. */
static bool
-ix86_must_pass_in_stack (enum machine_mode mode, tree type)
+ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
{
if (must_pass_in_stack_var_size_or_pad (mode, type))
return true;
the middle-end decides to do with these vector types. */
static enum machine_mode
-type_natural_mode (tree type)
+type_natural_mode (const_tree type)
{
enum machine_mode mode = TYPE_MODE (type);
*/
static int
-classify_argument (enum machine_mode mode, tree type,
+classify_argument (enum machine_mode mode, const_tree type,
enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
{
HOST_WIDE_INT bytes =
/* Examine the argument and return set number of register required in each
class. Return 0 iff parameter should be passed in memory. */
static int
-examine_argument (enum machine_mode mode, tree type, int in_return,
+examine_argument (enum machine_mode mode, const_tree type, int in_return,
int *int_nregs, int *sse_nregs)
{
enum x86_64_reg_class regclass[MAX_CLASSES];
static rtx
construct_container (enum machine_mode mode, enum machine_mode orig_mode,
- tree type, int in_return, int nintregs, int nsseregs,
+ const_tree type, int in_return, int nintregs, int nsseregs,
const int *intreg, int sse_regno)
{
/* The following variables hold the static issued_error state. */
static bool
ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
- tree type, bool named ATTRIBUTE_UNUSED)
+ const_tree type, bool named ATTRIBUTE_UNUSED)
{
if (TARGET_64BIT_MS_ABI)
{
static rtx
function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
- tree fntype, tree fn)
+ const_tree fntype, const_tree fn)
{
unsigned int regno;
static rtx
function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
- tree valtype)
+ const_tree valtype)
{
rtx ret;
}
static rtx
-ix86_function_value_1 (tree valtype, tree fntype_or_decl,
+ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
enum machine_mode orig_mode, enum machine_mode mode)
{
- tree fn, fntype;
+ const_tree fn, fntype;
fn = NULL_TREE;
if (fntype_or_decl && DECL_P (fntype_or_decl))
}
static rtx
-ix86_function_value (tree valtype, tree fntype_or_decl,
+ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
bool outgoing ATTRIBUTE_UNUSED)
{
enum machine_mode mode, orig_mode;
/* Return true iff type is returned in memory. */
static int
-return_in_memory_32 (tree type, enum machine_mode mode)
+return_in_memory_32 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size;
}
static int
-return_in_memory_64 (tree type, enum machine_mode mode)
+return_in_memory_64 (const_tree type, enum machine_mode mode)
{
int needed_intregs, needed_sseregs;
return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
}
static int
-return_in_memory_ms_64 (tree type, enum machine_mode mode)
+return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size = int_size_in_bytes (type);
}
int
-ix86_return_in_memory (tree type)
+ix86_return_in_memory (const_tree type)
{
- enum machine_mode mode = type_natural_mode (type);
+ const enum machine_mode mode = type_natural_mode (type);
if (TARGET_64BIT_MS_ABI)
return return_in_memory_ms_64 (type, mode);
are returned in memory, rather than in MMX registers. */
int
-ix86_sol10_return_in_memory (tree type)
+ix86_sol10_return_in_memory (const_tree type)
{
int size;
enum machine_mode mode = type_natural_mode (type);
rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
- int set;
+ alias_set_type set;
int i;
if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
/* Indicate to allocate space on the stack for varargs save area. */
ix86_save_varrargs_registers = 1;
- cfun->stack_alignment_needed = 128;
+ /* We need 16-byte stack alignment to save SSE registers. If user
+ asked for lower preferred_stack_boundary, lets just hope that he knows
+ what he is doing and won't varargs SSE values.
+
+ We also may end up assuming that only 64bit values are stored in SSE
+ register let some floating point program work. */
+ if (ix86_preferred_stack_boundary >= 128)
+ cfun->stack_alignment_needed = 128;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
static void
setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
{
- int set = get_varargs_alias_set ();
+ alias_set_type set = get_varargs_alias_set ();
int i;
for (i = cum->regno; i < REGPARM_MAX; i++)
if (parts.index && GET_CODE (parts.index) == SUBREG)
parts.index = SUBREG_REG (parts.index);
- /* More complex memory references are better. */
- if (parts.disp && parts.disp != const0_rtx)
- cost--;
- if (parts.seg != SEG_DEFAULT)
- cost--;
-
/* Attempt to minimize number of registers in the address. */
if ((parts.base
&& (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
\f
/* Return a unique alias set for the GOT. */
-static HOST_WIDE_INT
+static alias_set_type
ix86_GOT_alias_set (void)
{
- static HOST_WIDE_INT set = -1;
+ static alias_set_type set = -1;
if (set == -1)
set = new_alias_set ();
return set;
return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
}
- if (flag_pic && SYMBOLIC_CONST (x))
- return legitimize_pic_address (x, 0);
-
if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
{
if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
}
}
+ if (flag_pic && SYMBOLIC_CONST (x))
+ return legitimize_pic_address (x, 0);
+
/* Canonicalize shifts by 0, 1, 2, 3 into multiply */
if (GET_CODE (x) == ASHIFT
&& CONST_INT_P (XEXP (x, 1))
case GTU:
/* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
Those same assemblers have the same but opposite lossage on cmov. */
- gcc_assert (mode == CCmode);
- suffix = fp ? "nbe" : "a";
+ if (mode == CCmode)
+ suffix = fp ? "nbe" : "a";
+ else if (mode == CCCmode)
+ suffix = "b";
+ else
+ gcc_unreachable ();
break;
case LT:
switch (mode)
}
break;
case LTU:
- gcc_assert (mode == CCmode);
+ gcc_assert (mode == CCmode || mode == CCCmode);
suffix = "b";
break;
case GE:
break;
case GEU:
/* ??? As above. */
- gcc_assert (mode == CCmode);
+ gcc_assert (mode == CCmode || mode == CCCmode);
suffix = fp ? "nb" : "ae";
break;
case LE:
suffix = "le";
break;
case LEU:
- gcc_assert (mode == CCmode);
- suffix = "be";
+ /* ??? As above. */
+ if (mode == CCmode)
+ suffix = "be";
+ else if (mode == CCCmode)
+ suffix = fp ? "nb" : "ae";
+ else
+ gcc_unreachable ();
break;
case UNORDERED:
suffix = fp ? "u" : "p";
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
H -- print a memory address offset by 8; used for sse high-parts
+ + -- print a branch hint as 'cs' or 'ds' prefix
+ ; -- print a semicolon (after prefixes due to bug in older gas).
*/
void
}
return;
}
+
+ case ';':
+#if TARGET_MACHO
+ fputs (" ; ", file);
+#else
+ fputc (' ', file);
+#endif
+ return;
+
default:
output_operand_lossage ("invalid operand code '%c'", code);
}
/* This predicate should match that for movsi_xor and movdi_xor_rex64. */
if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
{
- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
+ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
}
op1 = force_reg (Pmode, op1);
else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
{
- rtx reg = no_new_pseudos ? op0 : NULL_RTX;
+ rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
op1 = legitimize_pic_address (op1, reg);
if (op0 == op1)
return;
are moved via xmm registers, and moving them to stack can result in
unaligned memory access. Use ix86_expand_vector_move_misalign()
if memory operand is not aligned correctly. */
- if (!no_new_pseudos
+ if (can_create_pseudo_p ()
&& (mode == TImode) && !TARGET_64BIT
&& ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
|| (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
}
/* Make operand1 a register if it isn't already. */
- if (!no_new_pseudos
+ if (can_create_pseudo_p ()
&& !register_operand (op0, mode)
&& !register_operand (op1, mode))
{
return CCZmode;
/* Codes needing carry flag. */
case GEU: /* CF=0 */
- case GTU: /* CF=0 & ZF=0 */
case LTU: /* CF=1 */
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == PLUS
+ && rtx_equal_p (op1, XEXP (op0, 0)))
+ return CCCmode;
+ else
+ return CCmode;
+ case GTU: /* CF=0 & ZF=0 */
case LEU: /* CF=1 | ZF=1 */
- return CCmode;
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == MINUS
+ && rtx_equal_p (op1, XEXP (op0, 0)))
+ return CCCmode;
+ else
+ return CCmode;
/* Codes possibly doable only with sign flag when
comparing against zero. */
case GE: /* SF=OF or SF=0 */
/* Try to rearrange the comparison to make it cheaper. */
if (ix86_fp_comparison_cost (code)
> ix86_fp_comparison_cost (swap_condition (code))
- && (REG_P (op1) || !no_new_pseudos))
+ && (REG_P (op1) || can_create_pseudo_p ()))
{
rtx tmp;
tmp = op0, op0 = op1, op1 = tmp;
ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
/* Do fcomi/sahf based test when profitable. */
- if ((TARGET_CMOVE || TARGET_SAHF)
+ if (ix86_fp_comparison_arithmetics_cost (code) > cost
&& (bypass_code == UNKNOWN || bypass_test)
- && (second_code == UNKNOWN || second_test)
- && ix86_fp_comparison_arithmetics_cost (code) > cost)
+ && (second_code == UNKNOWN || second_test))
{
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+ tmp);
if (TARGET_CMOVE)
- {
- tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
- tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
- tmp);
- emit_insn (tmp);
- }
+ emit_insn (tmp);
else
{
- tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
- tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ gcc_assert (TARGET_SAHF);
+
if (!scratch)
scratch = gen_reg_rtx (HImode);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
- emit_insn (gen_x86_sahf_1 (scratch));
+ tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
+
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
}
/* The FP codes work out to act like unsigned. */
/* Check whether we will use the natural sequence with one jump. If
so, we can expand jump early. Otherwise delay expansion by
creating compound insn to not confuse optimizers. */
- if (bypass_code == UNKNOWN && second_code == UNKNOWN
- && TARGET_CMOVE)
+ if (bypass_code == UNKNOWN && second_code == UNKNOWN)
{
ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
gen_rtx_LABEL_REF (VOIDmode, label),
vec = rtvec_alloc (3 + !use_fcomi);
RTVEC_ELT (vec, 0) = tmp;
RTVEC_ELT (vec, 1)
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
RTVEC_ELT (vec, 2)
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
if (! use_fcomi)
RTVEC_ELT (vec, 3)
= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
enum machine_mode mode =
GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
- /* Do not handle DImode compares that go through special path.
- Also we can't deal with FP compares yet. This is possible to add. */
+ /* Do not handle DImode compares that go through special path. */
if (mode == (TARGET_64BIT ? TImode : DImode))
return false;
code = swap_condition (code);
}
- /* Try to expand the comparison and verify that we end up with carry flag
- based comparison. This is fails to be true only when we decide to expand
- comparison using arithmetic that is not too common scenario. */
+ /* Try to expand the comparison and verify that we end up with
+ carry flag based comparison. This fails to be true only when
+ we decide to expand comparison using arithmetic that is not
+ too common scenario. */
start_sequence ();
compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
&second_test, &bypass_test);
if (second_test || bypass_test)
return false;
+
if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
|| GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
else
code = GET_CODE (compare_op);
+
if (code != LTU && code != GEU)
return false;
+
emit_insn (compare_seq);
*pop = compare_op;
return true;
}
+
if (!INTEGRAL_MODE_P (mode))
return false;
+
switch (code)
{
case LTU:
/* Swapping operands may cause constant to appear as first operand. */
if (!nonimmediate_operand (op0, VOIDmode))
{
- if (no_new_pseudos)
+ if (!can_create_pseudo_p ())
return false;
op0 = force_reg (mode, op0);
}
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
- cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
+ cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
if (TARGET_64BIT)
emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
else
IX86_BUILTIN_VEC_SET_V4HI,
IX86_BUILTIN_VEC_SET_V16QI,
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
IX86_BUILTIN_CRC32HI,
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
= build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
tree v4si_ftype_v2df
= build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4si_ftype_v2df_v2df
+ = build_function_type_list (V4SI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
tree v2si_ftype_v2df
= build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
tree v4sf_ftype_v2df
ftype = build_function_type_list (float128_type_node,
float128_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
+ def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
ftype = build_function_type_list (float128_type_node,
float128_type_node,
float128_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
+ def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
}
/* Add all SSE builtins that are more or less simple operations on
break;
}
- def_builtin (d->mask, d->name, type, d->code);
+ def_builtin_const (d->mask, d->name, type, d->code);
}
/* Add all builtins that are more or less simple operations on two
|| d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
type = v2di_ftype_v2df_v2df;
- def_builtin (d->mask, d->name, type, d->code);
+ if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
+ type = v4si_ftype_v2df_v2df;
+
+ def_builtin_const (d->mask, d->name, type, d->code);
}
/* Add all builtins that are more or less simple operations on 1 operand. */
abort ();
}
- def_builtin (d->mask, d->name, type, d->code);
+ def_builtin_const (d->mask, d->name, type, d->code);
}
/* pcmpestr[im] insns. */
ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
else
ftype = int_ftype_v16qi_int_v16qi_int_int;
- def_builtin (d->mask, d->name, ftype, d->code);
+ def_builtin_const (d->mask, d->name, ftype, d->code);
}
/* pcmpistr[im] insns. */
ftype = v16qi_ftype_v16qi_v16qi_int;
else
ftype = int_ftype_v16qi_v16qi_int;
- def_builtin (d->mask, d->name, ftype, d->code);
+ def_builtin_const (d->mask, d->name, ftype, d->code);
}
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
/* comi/ucomi insns. */
for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
if (d->mask == OPTION_MASK_ISA_SSE2)
- def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
else
- def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+ def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
/* ptest insns. */
for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
+ def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
ftype = build_function_type_list (float_type_node,
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
/* Original 3DNow! */
def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
- def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
/* 3DNow! extension as used in the Athlon CPU. */
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
- def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+ def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
/* SSE2 */
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
/* Prescott New Instructions. */
def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
/* SSSE3. */
- def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
- def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
+ def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
+ def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
/* SSE4.1. */
def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
unsigned_type_node,
unsigned_char_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
ftype = build_function_type_list (unsigned_type_node,
unsigned_type_node,
short_unsigned_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
ftype = build_function_type_list (unsigned_type_node,
unsigned_type_node,
unsigned_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
ftype = build_function_type_list (long_long_unsigned_type_node,
long_long_unsigned_type_node,
long_long_unsigned_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
/* AMDFAM10 SSE4A New built-ins */
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
- def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+ def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
+ def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
short_integer_type_node,
short_integer_type_node,
short_integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
ftype = build_function_type_list (V8QI_type_node, char_type_node,
char_type_node, char_type_node,
char_type_node, char_type_node,
char_type_node, char_type_node,
char_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
/* Access to the vec_extract patterns. */
ftype = build_function_type_list (double_type_node, V2DF_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
ftype = build_function_type_list (long_long_integer_type_node,
V2DI_type_node, integer_type_node,
NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
ftype = build_function_type_list (float_type_node, V4SF_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
/* Access to the vec_set patterns. */
ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
intDI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
float_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
intSI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
intHI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
intHI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
intQI_type_node,
integer_type_node, NULL_TREE);
- def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
}
static void
op1 = gen_lowpart (TImode, x);
}
- /* The insn must want input operands in the same modes as the
- result. */
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-
if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
if (out_mode == DFmode && out_n == 2
&& in_mode == DFmode && in_n == 2)
return ix86_builtins[IX86_BUILTIN_SQRTPD];
- return NULL_TREE;
+ break;
case BUILT_IN_SQRTF:
if (out_mode == SFmode && out_n == 4
&& in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_SQRTPS];
- return NULL_TREE;
+ break;
+
+ case BUILT_IN_LRINT:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ break;
case BUILT_IN_LRINTF:
if (out_mode == SImode && out_n == 4
&& in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
- return NULL_TREE;
+ break;
default:
;
}
+ /* Dispatch to a handler for a vectorization library. */
+ if (ix86_veclib_handler)
+ return (*ix86_veclib_handler)(fn, type_out, type_in);
+
return NULL_TREE;
}
+/* Handler for an ACML-style interface to a library with vectorized
+ intrinsics. */
+
+static tree
+ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
+{
+ char name[20] = "__vr.._";
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+ enum machine_mode el_mode, in_mode;
+ int n, in_n;
+
+ /* The ACML is 64bits only and suitable for unsafe math only as
+ it does not correctly support parts of IEEE with the required
+ precision such as denormals. */
+ if (!TARGET_64BIT
+ || !flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type_out));
+ n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (el_mode != in_mode
+ || n != in_n)
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_SIN:
+ case BUILT_IN_COS:
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG2:
+ case BUILT_IN_LOG10:
+ name[4] = 'd';
+ name[5] = '2';
+ if (el_mode != DFmode
+ || n != 2)
+ return NULL_TREE;
+ break;
+
+ case BUILT_IN_SINF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_EXPF:
+ case BUILT_IN_POWF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG2F:
+ case BUILT_IN_LOG10F:
+ name[4] = 's';
+ name[5] = '4';
+ if (el_mode != SFmode
+ || n != 4)
+ return NULL_TREE;
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+ sprintf (name + 7, "%s", bname+10);
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ else
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+ /* Build a function declaration for the vectorized function. */
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
+
/* Returns a decl of a function that implements conversion of the
input vector of type TYPE, or NULL_TREE if it is not available. */
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers.
+ To optimize register_move_cost performance, allow inline variant.
+
The macro can't work reliably when one of the CLASSES is class containing
registers from multiple units (SSE, MMX, integer). We avoid this by never
combining those units in single alternative in the machine description.
When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
enforce these sanity checks. */
-int
-ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+static inline int
+inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
enum machine_mode mode, int strict)
{
if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
return false;
}
+int
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+ enum machine_mode mode, int strict)
+{
+ return inline_secondary_memory_needed (class1, class2, mode, strict);
+}
+
/* Return true if the registers in CLASS cannot represent the change from
modes FROM to TO. */
return false;
}
+/* Return the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ This function is used extensively by register_move_cost that is used to
+ build tables at startup. Make it inline in this case.
+ When IN is 2, return maximum of in and out move cost.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost.
+
+ Model also increased moving costs of QImode registers in non
+ Q_REGS classes.
+ */
+static inline int
+inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
+ int in)
+{
+ int cost;
+ if (FLOAT_CLASS_P (regclass))
+ {
+ int index;
+ switch (mode)
+ {
+ case SFmode:
+ index = 0;
+ break;
+ case DFmode:
+ index = 1;
+ break;
+ case XFmode:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ if (in == 2)
+ return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
+ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+ }
+ if (SSE_CLASS_P (regclass))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ case 16:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ if (in == 2)
+ return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
+ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+ }
+ if (MMX_CLASS_P (regclass))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ default:
+ return 100;
+ }
+ if (in)
+ return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
+ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+ }
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ if (Q_CLASS_P (regclass) || TARGET_64BIT)
+ {
+ if (!in)
+ return ix86_cost->int_store[0];
+ if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
+ cost = ix86_cost->movzbl_load;
+ else
+ cost = ix86_cost->int_load[0];
+ if (in == 2)
+ return MAX (cost, ix86_cost->int_store[0]);
+ return cost;
+ }
+ else
+ {
+ if (in == 2)
+ return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
+ if (in)
+ return ix86_cost->movzbl_load;
+ else
+ return ix86_cost->int_store[0] + 4;
+ }
+ break;
+ case 2:
+ if (in == 2)
+ return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
+ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+ default:
+ /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
+ if (mode == TFmode)
+ mode = XFmode;
+ if (in == 2)
+ cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
+ else if (in)
+ cost = ix86_cost->int_load[2];
+ else
+ cost = ix86_cost->int_store[2];
+ return (cost * (((int) GET_MODE_SIZE (mode)
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+ }
+}
+
+int
+ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
+{
+ return inline_memory_move_cost (mode, regclass, in);
+}
+
+
/* Return the cost of moving data from a register in class CLASS1 to
one in class CLASS2.
by load. In order to avoid bad register allocation choices, we need
for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
- if (ix86_secondary_memory_needed (class1, class2, mode, 0))
+ if (inline_secondary_memory_needed (class1, class2, mode, 0))
{
int cost = 1;
- cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
- MEMORY_MOVE_COST (mode, class1, 1));
- cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
- MEMORY_MOVE_COST (mode, class2, 1));
+ cost += inline_memory_move_cost (mode, class1, 2);
+ cost += inline_memory_move_cost (mode, class2, 2);
/* In case of copying from general_purpose_register we may emit multiple
stores followed by single load causing memory size mismatch stall.
/* Moves between SSE/MMX and integer unit are expensive. */
if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
|| SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
- return ix86_cost->mmxsse_to_integer;
+
+ /* ??? By keeping returned value relatively high, we limit the number
+ of moves between integer and MMX/SSE registers for all targets.
+ Additionally, high value prevents problem with x86_modes_tieable_p(),
+ where integer modes in MMX/SSE registers are not tieable
+ because of missing QImode and HImode moves to, from or between
+ MMX/SSE registers. */
+ return MAX (ix86_cost->mmxsse_to_integer, 8);
+
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->fp_move;
if (MAYBE_SSE_CLASS_P (class1))
return 1;
else if (VALID_FP_MODE_P (mode))
return 1;
+ else if (VALID_DFP_MODE_P (mode))
+ return 1;
/* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
on to use that value in smaller contexts, this can easily force a
pseudo to be allocated to GENERAL_REGS. Since this is no worse than
return false;
}
-/* Return the cost of moving data of mode M between a
- register and memory. A value of 2 is the default; this cost is
- relative to those in `REGISTER_MOVE_COST'.
-
- If moving between registers and memory is more expensive than
- between two registers, you should define this macro to express the
- relative cost.
-
- Model also increased moving costs of QImode registers in non
- Q_REGS classes.
- */
-int
-ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
-{
- if (FLOAT_CLASS_P (regclass))
- {
- int index;
- switch (mode)
- {
- case SFmode:
- index = 0;
- break;
- case DFmode:
- index = 1;
- break;
- case XFmode:
- index = 2;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
- }
- if (SSE_CLASS_P (regclass))
- {
- int index;
- switch (GET_MODE_SIZE (mode))
- {
- case 4:
- index = 0;
- break;
- case 8:
- index = 1;
- break;
- case 16:
- index = 2;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
- }
- if (MMX_CLASS_P (regclass))
- {
- int index;
- switch (GET_MODE_SIZE (mode))
- {
- case 4:
- index = 0;
- break;
- case 8:
- index = 1;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
- }
- switch (GET_MODE_SIZE (mode))
- {
- case 1:
- if (in)
- return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
- : ix86_cost->movzbl_load);
- else
- return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
- : ix86_cost->int_store[0] + 4);
- break;
- case 2:
- return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
- default:
- /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
- if (mode == TFmode)
- mode = XFmode;
- return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
- * (((int) GET_MODE_SIZE (mode)
- + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
- }
-}
-
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
}
static bool
-ix86_ms_bitfield_layout_p (tree record_type)
+ix86_ms_bitfield_layout_p (const_tree record_type)
{
return (TARGET_MS_BITFIELD_LAYOUT &&
!lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
/* Determine whether x86_output_mi_thunk can succeed. */
static bool
-x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
+x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
- HOST_WIDE_INT vcall_offset, tree function)
+ HOST_WIDE_INT vcall_offset, const_tree function)
{
/* 64-bit can handle anything. */
if (TARGET_64BIT)
return false;
}
+/* Target hook for c_mode_for_suffix. */
+static enum machine_mode
+ix86_c_mode_for_suffix (char suffix)
+{
+ if (TARGET_64BIT && suffix == 'q')
+ return TFmode;
+ if (TARGET_MMX && suffix == 'w')
+ return XFmode;
+
+ return VOIDmode;
+}
+
/* Worker function for TARGET_MD_ASM_CLOBBERS.
We do this in the new i386 backend to maintain source compatibility
void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
bool recip)
{
- rtx x0, e0, e1, e2, e3, three, half, bignum;
+ rtx x0, e0, e1, e2, e3, three, half, zero, mask;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
- bignum = gen_lowpart (SFmode, GEN_INT (0x7f7fffff));
+
+ mask = gen_reg_rtx (mode);
if (VECTOR_MODE_P (mode))
{
three = ix86_build_const_vector (SFmode, true, three);
half = ix86_build_const_vector (SFmode, true, half);
- bignum = ix86_build_const_vector (SFmode, true, bignum);
}
three = force_reg (mode, three);
half = force_reg (mode, half);
- bignum = force_reg (mode, bignum);
+
+ zero = force_reg (mode, CONST0_RTX(mode));
/* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
+ /* Compare a to zero. */
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, a, zero)));
+
/* x0 = 1./sqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT)));
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_SMIN (mode, x0, bignum)));
-
+ /* Filter out infinity. */
+ if (VECTOR_MODE_P (mode))
+ emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
+ gen_rtx_AND (mode,
+ gen_lowpart (V4SFmode, x0),
+ gen_lowpart (V4SFmode, mask))));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MULT (mode, x0, a)));
/* Return the mangling of TYPE if it is an extended fundamental type. */
static const char *
-ix86_mangle_fundamental_type (tree type)
+ix86_mangle_type (const_tree type)
{
+ type = TYPE_MAIN_VARIANT (type);
+
+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ return NULL;
+
switch (TYPE_MODE (type))
{
case TFmode:
/* load nextafter (0.5, 0.0) */
fmt = REAL_MODE_FORMAT (mode);
- real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
/* adj = copysign (0.5, op1) */
/* load nextafter (0.5, 0.0) */
fmt = REAL_MODE_FORMAT (mode);
- real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
/* xa = xa + 0.5 */
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
-#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
#undef TARGET_DELEGITIMIZE_ADDRESS
#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
#undef TARGET_SETUP_INCOMING_VARARGS
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
+
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
#endif
-#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
-#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail