rtx ix86_compare_op1 = NULL_RTX;
rtx ix86_compare_emitted = NULL_RTX;
-/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
/* Define the structure for the machine field in struct function. */
struct stack_local_entry GTY(())
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
+ X86_64_AVX_CLASS,
X86_64_SSE_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
static void ix86_compute_frame_layout (struct ix86_frame *);
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
rtx, rtx, int);
+static void ix86_add_new_builtins (int);
enum ix86_function_specific_strings
{
static void ix86_function_specific_restore (struct cl_target_option *);
static void ix86_function_specific_print (FILE *, int,
struct cl_target_option *);
-static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
-static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
+static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
static bool ix86_can_inline_p (tree, tree);
static void ix86_set_current_function (tree);
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
#define OPTION_MASK_ISA_SSE4_2_SET \
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+#define OPTION_MASK_ISA_AVX_SET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
+#define OPTION_MASK_ISA_FMA_SET \
+ (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
+#define OPTION_MASK_ISA_SSE4_2_UNSET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
+#define OPTION_MASK_ISA_AVX_UNSET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
+#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
}
return true;
+ case OPT_mavx:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
+ }
+ return true;
+
+ case OPT_mfma:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+ }
+ return true;
+
case OPT_msse4:
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
PTA_SSE4_2 = 1 << 15,
PTA_SSE5 = 1 << 16,
PTA_AES = 1 << 17,
- PTA_PCLMUL = 1 << 18
+ PTA_PCLMUL = 1 << 18,
+ PTA_AVX = 1 << 19,
+ PTA_FMA = 1 << 20
};
static struct pta
int const pta_size = ARRAY_SIZE (processor_alias_table);
/* Set up prefix/suffix so the error messages refer to either the command
- line argument, or the attribute(option). */
+ line argument, or the attribute(target). */
if (main_args_p)
{
prefix = "-m";
if (processor_alias_table[i].flags & PTA_SSE4_2
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
+ if (processor_alias_table[i].flags & PTA_AVX
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+ if (processor_alias_table[i].flags & PTA_FMA
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA;
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
}
\f
-/* Inner function to process the attribute((option(...))), take an argument and
+/* Inner function to process the attribute((target(...))), take an argument and
set the current options from the argument. If we have a list, recursively go
over the list. */
static bool
-ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
{
char *next_optstr;
bool ret = true;
for (; args; args = TREE_CHAIN (args))
if (TREE_VALUE (args)
- && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
+ && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
ret = false;
return ret;
/* Process the option. */
if (opt == N_OPTS)
{
- error ("attribute(option(\"%s\")) is unknown", orig_p);
+ error ("attribute(target(\"%s\")) is unknown", orig_p);
ret = false;
}
/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
tree
-ix86_valid_option_attribute_tree (tree args)
+ix86_valid_target_attribute_tree (tree args)
{
const char *orig_arch_string = ix86_arch_string;
const char *orig_tune_string = ix86_tune_string;
= TREE_TARGET_OPTION (target_option_default_node);
/* Process each of the options on the chain. */
- if (! ix86_valid_option_attribute_inner_p (args, option_strings))
+ if (! ix86_valid_target_attribute_inner_p (args, option_strings))
return NULL_TREE;
/* If the changed options are different from the default, rerun override_options,
/* Do any overrides, such as arch=xxx, or tune=xxx support. */
override_options (false);
+ /* Add any builtin functions with the new isa if any. */
+ ix86_add_new_builtins (ix86_isa_flags);
+
/* Save the current options unless we are validating options for
#pragma. */
t = build_target_option_node ();
return t;
}
-/* Hook to validate attribute((option("string"))). */
+/* Hook to validate attribute((target("string"))). */
static bool
-ix86_valid_option_attribute_p (tree fndecl,
+ix86_valid_target_attribute_p (tree fndecl,
tree ARG_UNUSED (name),
tree args,
int ARG_UNUSED (flags))
{
- struct cl_target_option cur_opts;
+ struct cl_target_option cur_target;
bool ret = true;
- tree new_opts;
-
- cl_target_option_save (&cur_opts);
- new_opts = ix86_valid_option_attribute_tree (args);
- if (!new_opts)
+ tree old_optimize = build_optimization_node ();
+ tree new_target, new_optimize;
+ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+ /* If the function changed the optimization levels as well as setting target
+ options, start with the optimizations specified. */
+ if (func_optimize && func_optimize != old_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
+
+ /* The target attributes may also change some optimization flags, so update
+ the optimization options if necessary. */
+ cl_target_option_save (&cur_target);
+ new_target = ix86_valid_target_attribute_tree (args);
+ new_optimize = build_optimization_node ();
+
+ if (!new_target)
ret = false;
else if (fndecl)
- DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
+ {
+ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+ if (old_optimize != new_optimize)
+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+ }
+
+ cl_target_option_restore (&cur_target);
+
+ if (old_optimize != new_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
- cl_target_option_restore (&cur_opts);
return ret;
}
/* Dllimport'd functions are also called indirectly. */
if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && !TARGET_64BIT
&& decl && DECL_DLLIMPORT_P (decl)
&& ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
return false;
}
if (TARGET_MMX)
cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->warn_avx = true;
cum->warn_sse = true;
cum->warn_mmx = true;
cum->nregs = 0;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
+ cum->warn_avx = 0;
cum->warn_sse = 0;
cum->warn_mmx = 0;
return;
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
case CTImode:
+ case COImode:
+ case OImode:
return 0;
case SFmode:
if (!(bit_offset % 64))
case TCmode:
/* This modes is larger than 16 bytes. */
return 0;
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ classes[0] = X86_64_AVX_CLASS;
+ return 1;
case V4SFmode:
case V4SImode:
case V16QImode:
case X86_64_INTEGERSI_CLASS:
(*int_nregs)++;
break;
+ case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
return gen_rtx_REG (mode, intreg[0]);
+ case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
break;
/* FALLTHRU */
+ case OImode:
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
case TImode:
case V16QImode:
case V8HImode:
static void
function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, HOST_WIDE_INT words)
+ tree type, HOST_WIDE_INT words, int named)
{
int int_nregs, sse_nregs;
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named && VALID_AVX256_REG_MODE (mode))
+ return;
+
if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
cum->words += words;
else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
void
function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, int named ATTRIBUTE_UNUSED)
+ tree type, int named)
{
HOST_WIDE_INT bytes, words;
if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
- function_arg_advance_64 (cum, mode, type, words);
+ function_arg_advance_64 (cum, mode, type, words, named);
else
function_arg_advance_32 (cum, mode, type, bytes, words);
}
enum machine_mode orig_mode, tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
- static bool warnedsse, warnedmmx;
+ static bool warnedavx, warnedsse, warnedmmx;
/* Avoid the AL settings for the Unix64 ABI. */
if (mode == VOIDmode)
break;
/* FALLTHRU */
case TImode:
+ /* In 32bit, we pass TImode in xmm registers. */
case V16QImode:
case V8HImode:
case V4SImode:
}
break;
+ case OImode:
+ /* In 32bit, we pass OImode in ymm registers. */
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_AVX && !warnedavx && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX enabled "
+ "changes the ABI");
+ }
+ if (cum->sse_nregs)
+ return gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+
case V8QImode:
case V4HImode:
case V2SImode:
static rtx
function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- enum machine_mode orig_mode, tree type)
+ enum machine_mode orig_mode, tree type, int named)
{
+ static bool warnedavx;
+
/* Handle a hidden AL argument containing number of registers
for varargs x86-64 functions. */
if (mode == VOIDmode)
: cum->sse_regno)
: -1);
+ switch (mode)
+ {
+ default:
+ break;
+
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* In 64bit, we pass TImode in interger registers and OImode on
+ stack. */
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_AVX && !warnedavx && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX enabled "
+ "changes the ABI");
+ }
+ }
+
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named)
+ return NULL;
+ break;
+ }
+
return construct_container (mode, orig_mode, type, 0, cum->nregs,
cum->sse_nregs,
&x86_64_int_parameter_registers [cum->regno],
if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
- return function_arg_64 (cum, mode, omode, type);
+ return function_arg_64 (cum, mode, omode, type, named);
else
return function_arg_32 (cum, mode, omode, type, bytes, words);
}
int i;
int regparm = ix86_regparm;
- if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+ if (cum->call_abi != DEFAULT_ABI)
regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
- if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
- return;
+ /* GPR size of varargs save area. */
+ if (cfun->va_list_gpr_size)
+ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+ else
+ ix86_varargs_gpr_size = 0;
+
+ /* FPR size of varargs save area. We don't need it if we don't pass
+ anything in SSE registers. */
+ if (cum->sse_nregs && cfun->va_list_fpr_size)
+ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+ else
+ ix86_varargs_fpr_size = 0;
- /* Indicate to allocate space on the stack for varargs save area. */
- ix86_save_varrargs_registers = 1;
+ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+ return;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
x86_64_int_parameter_registers[i]));
}
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (ix86_varargs_fpr_size)
{
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use
label_ref = gen_rtx_LABEL_REF (Pmode, label);
/* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 */
+ label - eax*4 + nnamed_sse_arguments*4 Or
+ label - eax*5 + nnamed_sse_arguments*5 for AVX. */
tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
gen_rtx_MULT (Pmode, nsse_reg,
GEN_INT (4))));
+
+ /* vmovaps is one byte longer than movaps. */
+ if (TARGET_AVX)
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_PLUS (Pmode, tmp_reg,
+ nsse_reg)));
+
if (cum->sse_regno)
emit_move_insn
(nsse_reg,
gen_rtx_CONST (DImode,
gen_rtx_PLUS (DImode,
label_ref,
- GEN_INT (cum->sse_regno * 4))));
+ GEN_INT (cum->sse_regno
+ * (TARGET_AVX ? 5 : 4)))));
else
emit_move_insn (nsse_reg, label_ref);
emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes. */
+ instruction size limited by 4 bytes (5 bytes for AVX) with one
+ byte displacement. */
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
- 8 * X86_64_REGPARM_MAX + 127)));
+ ix86_varargs_gpr_size + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
- if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
else
setup_incoming_varargs_64 (&next_cum);
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- if (cfun->va_list_fpr_size)
+ if (TARGET_SSE && cfun->va_list_fpr_size)
{
type = TREE_TYPE (fpr);
t = build2 (MODIFY_EXPR, type, fpr,
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
{
/* Find the register save area.
Prologue of the function save it right above stack frame. */
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
+ if (!ix86_varargs_gpr_size)
+ t = build2 (POINTER_PLUS_EXPR, type, t,
+ size_int (-8 * X86_64_REGPARM_MAX));
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
nat_mode = type_natural_mode (type);
- container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
- X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
- intreg, 0);
+ switch (nat_mode)
+ {
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (ix86_cfun_abi () == SYSV_ABI)
+ {
+ container = NULL;
+ break;
+ }
+
+ default:
+ container = construct_container (nat_mode, TYPE_MODE (type),
+ type, 0, X86_64_REGPARM_MAX,
+ X86_64_SSE_REGPARM_MAX, intreg,
+ 0);
+ break;
+ }
/* Pull the value out of the saved registers. */
/* For XFmode constants, try to find a special 80387 instruction when
optimizing for size or on those CPUs that benefit from them. */
if (mode == XFmode
- && (optimize_insn_for_size_p () || TARGET_EXT_80387_CONSTANTS))
+ && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
{
int i;
}
}
-/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
- */
+/* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
+ SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
+ modes and AVX is enabled. */
+
int
standard_sse_constant_p (rtx x)
{
if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
return 1;
- if (vector_all_ones_operand (x, mode)
- && standard_sse_mode_p (mode))
- return TARGET_SSE2 ? 2 : -1;
+ if (vector_all_ones_operand (x, mode))
+ {
+ if (standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -2;
+ else if (VALID_AVX256_REG_MODE (mode))
+ return TARGET_AVX ? 3 : -3;
+ }
return 0;
}
switch (standard_sse_constant_p (x))
{
case 1:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else if (get_attr_mode (insn) == MODE_V2DF)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ case MODE_V2DF:
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ case MODE_TI:
+ return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+ case MODE_V8SF:
+ return "vxorps\t%x0, %x0, %x0";
+ case MODE_V4DF:
+ return "vxorpd\t%x0, %x0, %x0";
+ case MODE_OI:
+ return "vpxor\t%x0, %x0, %x0";
+ default:
+ gcc_unreachable ();
+ }
case 2:
- return "pcmpeqd\t%0, %0";
+ if (TARGET_AVX)
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ case MODE_V2DF:
+ case MODE_TI:
+ return "vpcmpeqd\t%0, %0, %0";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else
+ return "pcmpeqd\t%0, %0";
}
gcc_unreachable ();
}
Recompute the value as needed. Do not recompute when amount of registers
didn't change as reload does multiple calls to the function and does not
expect the decision to change within single iteration. */
- if (!optimize_size
+ if (!optimize_function_for_size_p (cfun)
&& cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
{
int count = frame->nregs;
offset += frame->nregs * UNITS_PER_WORD;
/* Va-arg area */
- if (ix86_save_varrargs_registers)
- {
- offset += X86_64_VARARGS_SIZE;
- frame->va_arg_size = X86_64_VARARGS_SIZE;
- }
- else
- frame->va_arg_size = 0;
+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+ offset += frame->va_arg_size;
/* Align start of frame for local function. */
frame->padding1 = ((offset + stack_alignment_needed - 1)
+ frame.nregs * UNITS_PER_WORD),
style);
/* If not an i386, mov & pop is faster than "leave". */
- else if (TARGET_USE_LEAVE || optimize_size
+ else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
|| !cfun->machine->use_fast_prologue_epilogue)
emit_insn ((*ix86_gen_leave) ());
else
disp = const0_rtx;
/* Special case: on K6, [%esi] makes the instruction vector decoded.
- Avoid this by transforming to [%esi+0]. */
- if (TARGET_K6 && !optimize_size
+ Avoid this by transforming to [%esi+0].
+ Reload calls address legitimization without cfun defined, so we need
+ to test cfun for being non-NULL. */
+ if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
&& base_reg && !index_reg && !disp
&& REG_P (base_reg)
&& REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
requires to two regs - that would mean more pseudos with longer
lifetimes. */
static int
-ix86_address_cost (rtx x)
+ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
struct ix86_address parts;
int cost = 1;
If CODE is 'b', pretend the mode is QImode.
If CODE is 'k', pretend the mode is SImode.
If CODE is 'q', pretend the mode is DImode.
+ If CODE is 'x', pretend the mode is V4SFmode.
+ If CODE is 't', pretend the mode is V8SFmode.
If CODE is 'h', pretend the reg is the 'high' byte register.
- If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+ If CODE is 'd', duplicate the operand for AVX instruction.
+ */
void
print_reg (rtx x, int code, FILE *file)
{
+ const char *reg;
+ bool duplicated = code == 'd' && TARGET_AVX;
+
gcc_assert (x == pc_rtx
|| (REGNO (x) != ARG_POINTER_REGNUM
&& REGNO (x) != FRAME_POINTER_REGNUM
code = 3;
else if (code == 'h')
code = 0;
+ else if (code == 'x')
+ code = 16;
+ else if (code == 't')
+ code = 32;
else
code = GET_MODE_SIZE (GET_MODE (x));
}
return;
}
+
+ reg = NULL;
switch (code)
{
case 3:
if (STACK_TOP_P (x))
{
- fputs ("st(0)", file);
+ reg = "st(0)";
break;
}
/* FALLTHRU */
case 16:
case 2:
normal:
- fputs (hi_reg_name[REGNO (x)], file);
+ reg = hi_reg_name[REGNO (x)];
break;
case 1:
if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
goto normal;
- fputs (qi_reg_name[REGNO (x)], file);
+ reg = qi_reg_name[REGNO (x)];
break;
case 0:
if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
goto normal;
- fputs (qi_high_reg_name[REGNO (x)], file);
+ reg = qi_high_reg_name[REGNO (x)];
+ break;
+ case 32:
+ if (SSE_REG_P (x))
+ {
+ gcc_assert (!duplicated);
+ putc ('y', file);
+ fputs (hi_reg_name[REGNO (x)] + 1, file);
+ return;
+ }
break;
default:
gcc_unreachable ();
}
+
+ fputs (reg, file);
+ if (duplicated)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, ", %%%s", reg);
+ else
+ fprintf (file, ", %s", reg);
+ }
}
/* Locate some local-dynamic symbol still in use by this function
w -- likewise, print the HImode name of the register.
k -- likewise, print the SImode name of the register.
q -- likewise, print the DImode name of the register.
+ x -- likewise, print the V4SFmode name of the register.
+ t -- likewise, print the V8SFmode name of the register.
h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
y -- print "st(0)" instead of "st" as a register.
+ d -- print duplicated register operand for AVX instruction.
D -- print condition for SSE cmp instruction.
P -- if PIC, print an @PLT suffix.
X -- don't print any sort of PIC '@' suffix for a symbol.
gcc_unreachable ();
}
+ case 'd':
case 'b':
case 'w':
case 'k':
case 'q':
case 'h':
+ case 't':
case 'y':
+ case 'x':
case 'X':
case 'P':
break;
/* Little bit of braindamage here. The SSE compare instructions
does use completely different names for the comparisons that the
fp conditional moves. */
- switch (GET_CODE (x))
+ if (TARGET_AVX)
{
- case EQ:
- case UNEQ:
- fputs ("eq", file);
- break;
- case LT:
- case UNLT:
- fputs ("lt", file);
- break;
- case LE:
- case UNLE:
- fputs ("le", file);
- break;
- case UNORDERED:
- fputs ("unord", file);
- break;
- case NE:
- case LTGT:
- fputs ("neq", file);
- break;
- case UNGE:
- case GE:
- fputs ("nlt", file);
- break;
- case UNGT:
- case GT:
- fputs ("nle", file);
- break;
- case ORDERED:
- fputs ("ord", file);
- break;
- default:
- gcc_unreachable ();
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case UNEQ:
+ fputs ("eq_us", file);
+ break;
+ case LT:
+ fputs ("lt", file);
+ break;
+ case UNLT:
+ fputs ("nge", file);
+ break;
+ case LE:
+ fputs ("le", file);
+ break;
+ case UNLE:
+ fputs ("ngt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ fputs ("neq", file);
+ break;
+ case LTGT:
+ fputs ("neq_oq", file);
+ break;
+ case GE:
+ fputs ("ge", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case GT:
+ fputs ("gt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ case UNEQ:
+ fputs ("eq", file);
+ break;
+ case LT:
+ case UNLT:
+ fputs ("lt", file);
+ break;
+ case LE:
+ case UNLE:
+ fputs ("le", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ case LTGT:
+ fputs ("neq", file);
+ break;
+ case UNGE:
+ case GE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ case GT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
return;
case 'O':
{
rtx x;
- if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
+ if (!optimize
+ || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
return;
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
const char *
output_387_binary_op (rtx insn, rtx *operands)
{
- static char buf[30];
+ static char buf[40];
const char *p;
const char *ssep;
int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
p = "fiadd";
else
p = "fadd";
- ssep = "add";
+ ssep = "vadd";
break;
case MINUS:
p = "fisub";
else
p = "fsub";
- ssep = "sub";
+ ssep = "vsub";
break;
case MULT:
p = "fimul";
else
p = "fmul";
- ssep = "mul";
+ ssep = "vmul";
break;
case DIV:
p = "fidiv";
else
p = "fdiv";
- ssep = "div";
+ ssep = "vdiv";
break;
default:
if (is_sse)
{
- strcpy (buf, ssep);
- if (GET_MODE (operands[0]) == SFmode)
- strcat (buf, "ss\t{%2, %0|%0, %2}");
- else
- strcat (buf, "sd\t{%2, %0|%0, %2}");
+ if (TARGET_AVX)
+ {
+ strcpy (buf, ssep);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
+ else
+ strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
+ }
+ else
+ {
+ strcpy (buf, ssep + 1);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %0|%0, %2}");
+ else
+ strcat (buf, "sd\t{%2, %0|%0, %2}");
+ }
return buf;
}
strcpy (buf, p);
emit_insn (gen_x86_fnstcw_1 (stored_mode));
emit_move_insn (reg, copy_rtx (stored_mode));
- if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
+ if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
+ || optimize_function_for_size_p (cfun))
{
switch (mode)
{
if (is_sse)
{
+ static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
+ static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
+ static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
+ static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
+
if (GET_MODE (operands[0]) == SFmode)
if (unordered_p)
- return "ucomiss\t{%1, %0|%0, %1}";
+ return &ucomiss[TARGET_AVX ? 0 : 1];
else
- return "comiss\t{%1, %0|%0, %1}";
+ return &comiss[TARGET_AVX ? 0 : 1];
else
if (unordered_p)
- return "ucomisd\t{%1, %0|%0, %1}";
+ return &ucomisd[TARGET_AVX ? 0 : 1];
else
- return "comisd\t{%1, %0|%0, %1}";
+ return &comisd[TARGET_AVX ? 0 : 1];
}
gcc_assert (STACK_TOP_P (cmp_op0));
op0 = operands[0];
op1 = operands[1];
+ if (TARGET_AVX)
+ {
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_VECTOR_INT:
+ case MODE_INT:
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 16:
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_avx_movdqu (op0, op1));
+ break;
+ case 32:
+ op0 = gen_lowpart (V32QImode, op0);
+ op1 = gen_lowpart (V32QImode, op1);
+ emit_insn (gen_avx_movdqu256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case MODE_VECTOR_FLOAT:
+ op0 = gen_lowpart (mode, op0);
+ op1 = gen_lowpart (mode, op1);
+
+ switch (mode)
+ {
+ case V4SFmode:
+ emit_insn (gen_avx_movups (op0, op1));
+ break;
+ case V8SFmode:
+ emit_insn (gen_avx_movups256 (op0, op1));
+ break;
+ case V2DFmode:
+ emit_insn (gen_avx_movupd (op0, op1));
+ break;
+ case V4DFmode:
+ emit_insn (gen_avx_movupd256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
if (req_mode == CCZmode)
return 0;
/* FALLTHRU */
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
case CCZmode:
break;
*/
if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- && BRANCH_COST >= 2)
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
{
if (cf == 0)
{
optab op;
rtx var, orig_out, out, tmp;
- if (BRANCH_COST <= 2)
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
return 0; /* FAIL */
/* If one of the two operands is an interesting constant, load a
int *dynamic_check)
{
const struct stringop_algs * algs;
+ bool optimize_for_speed;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
&& alg != rep_prefix_8_byte))
const struct processor_costs *cost;
- cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost;
+ /* Even if the string operation call is cold, we still might spend a lot
+ of time processing large blocks. */
+ if (optimize_function_for_size_p (cfun)
+ || (optimize_insn_for_size_p ()
+ && expected_size != -1 && expected_size < 256))
+ optimize_for_speed = false;
+ else
+ optimize_for_speed = true;
+
+ cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
*dynamic_check = -1;
if (memset)
if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
return stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
- else if (optimize_insn_for_size_p ())
+ else if (!optimize_for_speed)
{
if (!count || (count & 3))
return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
int desired_align = 0;
enum stringop_alg alg;
int dynamic_check;
+ bool need_zero_guard = false;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
case no_stringop:
gcc_unreachable ();
case loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode);
break;
case unrolled_loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
break;
case rep_prefix_8_byte:
size_needed = 4;
break;
case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
case loop_1_byte:
+ need_zero_guard = true;
size_needed = 1;
break;
}
dst = change_address (dst, BLKmode, destreg);
expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
desired_align);
+ if (need_zero_guard && !count)
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
}
if (label && size_needed == 1)
{
rtx promoted_val = NULL;
bool force_loopy_epilogue = false;
int dynamic_check;
+ bool need_zero_guard = false;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
case no_stringop:
gcc_unreachable ();
case loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode);
break;
case unrolled_loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * 4;
break;
case rep_prefix_8_byte:
size_needed = 4;
break;
case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
case loop_1_byte:
+ need_zero_guard = true;
size_needed = 1;
break;
}
dst = change_address (dst, BLKmode, destreg);
expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
desired_align);
+ if (need_zero_guard && !count)
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
}
if (label && size_needed == 1)
{
}
return 0;
}
+
+/* Compute default value for "length_vex" attribute. It includes
+ 2 or 3 byte VEX prefix and 1 opcode byte. */
+
+int
+ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
+ int has_vex_w)
+{
+ int i;
+
+ /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
+ byte VEX prefix. */
+ if (!has_0f_opcode || has_vex_w)
+ return 3 + 1;
+
+ /* We can always use 2 byte VEX prefix in 32bit. */
+ if (!TARGET_64BIT)
+ return 2 + 1;
+
+ extract_insn_cached (insn);
+
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (REG_P (recog_data.operand[i]))
+ {
+ /* REX.W bit uses 3 byte VEX prefix. */
+ if (GET_MODE (recog_data.operand[i]) == DImode)
+ return 3 + 1;
+ }
+ else
+ {
+ /* REX.X or REX.B bits use 3 byte VEX prefix. */
+ if (MEM_P (recog_data.operand[i])
+ && x86_extended_reg_mentioned_p (recog_data.operand[i]))
+ return 3 + 1;
+ }
+
+ return 2 + 1;
+}
\f
/* Return the maximum number of instructions a cpu can issue. */
/* PCLMUL instruction */
IX86_BUILTIN_PCLMULQDQ128,
+ /* AVX */
+ IX86_BUILTIN_ADDPD256,
+ IX86_BUILTIN_ADDPS256,
+ IX86_BUILTIN_ADDSUBPD256,
+ IX86_BUILTIN_ADDSUBPS256,
+ IX86_BUILTIN_ANDPD256,
+ IX86_BUILTIN_ANDPS256,
+ IX86_BUILTIN_ANDNPD256,
+ IX86_BUILTIN_ANDNPS256,
+ IX86_BUILTIN_BLENDPD256,
+ IX86_BUILTIN_BLENDPS256,
+ IX86_BUILTIN_BLENDVPD256,
+ IX86_BUILTIN_BLENDVPS256,
+ IX86_BUILTIN_DIVPD256,
+ IX86_BUILTIN_DIVPS256,
+ IX86_BUILTIN_DPPS256,
+ IX86_BUILTIN_HADDPD256,
+ IX86_BUILTIN_HADDPS256,
+ IX86_BUILTIN_HSUBPD256,
+ IX86_BUILTIN_HSUBPS256,
+ IX86_BUILTIN_MAXPD256,
+ IX86_BUILTIN_MAXPS256,
+ IX86_BUILTIN_MINPD256,
+ IX86_BUILTIN_MINPS256,
+ IX86_BUILTIN_MULPD256,
+ IX86_BUILTIN_MULPS256,
+ IX86_BUILTIN_ORPD256,
+ IX86_BUILTIN_ORPS256,
+ IX86_BUILTIN_SHUFPD256,
+ IX86_BUILTIN_SHUFPS256,
+ IX86_BUILTIN_SUBPD256,
+ IX86_BUILTIN_SUBPS256,
+ IX86_BUILTIN_XORPD256,
+ IX86_BUILTIN_XORPS256,
+ IX86_BUILTIN_CMPSD,
+ IX86_BUILTIN_CMPSS,
+ IX86_BUILTIN_CMPPD,
+ IX86_BUILTIN_CMPPS,
+ IX86_BUILTIN_CMPPD256,
+ IX86_BUILTIN_CMPPS256,
+ IX86_BUILTIN_CVTDQ2PD256,
+ IX86_BUILTIN_CVTDQ2PS256,
+ IX86_BUILTIN_CVTPD2PS256,
+ IX86_BUILTIN_CVTPS2DQ256,
+ IX86_BUILTIN_CVTPS2PD256,
+ IX86_BUILTIN_CVTTPD2DQ256,
+ IX86_BUILTIN_CVTPD2DQ256,
+ IX86_BUILTIN_CVTTPS2DQ256,
+ IX86_BUILTIN_EXTRACTF128PD256,
+ IX86_BUILTIN_EXTRACTF128PS256,
+ IX86_BUILTIN_EXTRACTF128SI256,
+ IX86_BUILTIN_VZEROALL,
+ IX86_BUILTIN_VZEROUPPER,
+ IX86_BUILTIN_VZEROUPPER_REX64,
+ IX86_BUILTIN_VPERMILVARPD,
+ IX86_BUILTIN_VPERMILVARPS,
+ IX86_BUILTIN_VPERMILVARPD256,
+ IX86_BUILTIN_VPERMILVARPS256,
+ IX86_BUILTIN_VPERMILPD,
+ IX86_BUILTIN_VPERMILPS,
+ IX86_BUILTIN_VPERMILPD256,
+ IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERMIL2PD,
+ IX86_BUILTIN_VPERMIL2PS,
+ IX86_BUILTIN_VPERMIL2PD256,
+ IX86_BUILTIN_VPERMIL2PS256,
+ IX86_BUILTIN_VPERM2F128PD256,
+ IX86_BUILTIN_VPERM2F128PS256,
+ IX86_BUILTIN_VPERM2F128SI256,
+ IX86_BUILTIN_VBROADCASTSS,
+ IX86_BUILTIN_VBROADCASTSD256,
+ IX86_BUILTIN_VBROADCASTSS256,
+ IX86_BUILTIN_VBROADCASTPD256,
+ IX86_BUILTIN_VBROADCASTPS256,
+ IX86_BUILTIN_VINSERTF128PD256,
+ IX86_BUILTIN_VINSERTF128PS256,
+ IX86_BUILTIN_VINSERTF128SI256,
+ IX86_BUILTIN_LOADUPD256,
+ IX86_BUILTIN_LOADUPS256,
+ IX86_BUILTIN_STOREUPD256,
+ IX86_BUILTIN_STOREUPS256,
+ IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_LOADDQU256,
+ IX86_BUILTIN_STOREDQU256,
+ IX86_BUILTIN_MASKLOADPD,
+ IX86_BUILTIN_MASKLOADPS,
+ IX86_BUILTIN_MASKSTOREPD,
+ IX86_BUILTIN_MASKSTOREPS,
+ IX86_BUILTIN_MASKLOADPD256,
+ IX86_BUILTIN_MASKLOADPS256,
+ IX86_BUILTIN_MASKSTOREPD256,
+ IX86_BUILTIN_MASKSTOREPS256,
+ IX86_BUILTIN_MOVSHDUP256,
+ IX86_BUILTIN_MOVSLDUP256,
+ IX86_BUILTIN_MOVDDUP256,
+
+ IX86_BUILTIN_SQRTPD256,
+ IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256,
+ IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256,
+
+ IX86_BUILTIN_RCPPS256,
+
+ IX86_BUILTIN_ROUNDPD256,
+ IX86_BUILTIN_ROUNDPS256,
+
+ IX86_BUILTIN_UNPCKHPD256,
+ IX86_BUILTIN_UNPCKLPD256,
+ IX86_BUILTIN_UNPCKHPS256,
+ IX86_BUILTIN_UNPCKLPS256,
+
+ IX86_BUILTIN_SI256_SI,
+ IX86_BUILTIN_PS256_PS,
+ IX86_BUILTIN_PD256_PD,
+ IX86_BUILTIN_SI_SI256,
+ IX86_BUILTIN_PS_PS256,
+ IX86_BUILTIN_PD_PD256,
+
+ IX86_BUILTIN_VTESTZPD,
+ IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD,
+ IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS,
+ IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256,
+ IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256,
+ IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256,
+ IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256,
+ IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256,
+
+ IX86_BUILTIN_MOVMSKPD256,
+ IX86_BUILTIN_MOVMSKPS256,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_FABSQ,
IX86_BUILTIN_FNMSUBSD,
IX86_BUILTIN_FNMSUBPS,
IX86_BUILTIN_FNMSUBPD,
+ IX86_BUILTIN_PCMOV,
IX86_BUILTIN_PCMOV_V2DI,
IX86_BUILTIN_PCMOV_V4SI,
IX86_BUILTIN_PCMOV_V8HI,
/* Table for the ix86 builtin decls. */
static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
-/* Table to record which ISA options the builtin needs. */
-static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+/* Table of all of the builtin functions that are possible with different ISA's
+ but are waiting to be built until a function is declared to use that
+ ISA. */
+struct builtin_isa GTY(())
+{
+ tree type; /* builtin type to use in the declaration */
+ const char *name; /* function name */
+ int isa; /* isa_flags this builtin is defined for */
+ bool const_p; /* true if the declaration is constant */
+};
+
+static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+
/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
* of which isa_flags to use in the ix86_builtins_isa array. Stores the
* function decl in the ix86_builtins array. Returns the function decl or
* NULL_TREE, if the builtin was not added.
*
- * Record all builtins, even if it isn't an instruction set in the current ISA
- * in case the user uses function specific options for a different ISA. When
- * the builtin is expanded, check at that time whether it is valid. */
+ * If the front end has a special hook for builtin functions, delay adding
+ * builtin functions that aren't in the current ISA until the ISA is changed
+ * with function specific optimization. Doing so, can save about 300K for the
+ * default compiler. When the builtin is expanded, check at that time whether
+ * it is valid.
+ *
+ * If the front end doesn't have a special hook, record all builtins, even if
+ * it isn't an instruction set in the current ISA in case the user uses
+ * function specific options for a different ISA, so that we don't get scope
+ * errors if a builtin is added in the middle of a function scope. */
static inline tree
def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
{
- decl = add_builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
- ix86_builtins[(int) code] = decl;
- ix86_builtins_isa[(int) code] = mask;
- }
+ ix86_builtins_isa[(int) code].isa = mask;
- return decl;
-}
+ if ((mask & ix86_isa_flags) != 0
+ || (lang_hooks.builtin_function
+ == lang_hooks.builtin_function_ext_scope))
-/* Like def_builtin, but also marks the function decl "const". */
+ {
+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
+ NULL_TREE);
+ ix86_builtins[(int) code] = decl;
+ ix86_builtins_isa[(int) code].type = NULL_TREE;
+ }
+ else
+ {
+ ix86_builtins[(int) code] = NULL_TREE;
+ ix86_builtins_isa[(int) code].const_p = false;
+ ix86_builtins_isa[(int) code].type = type;
+ ix86_builtins_isa[(int) code].name = name;
+ }
+ }
+
+ return decl;
+}
+
+/* Like def_builtin, but also marks the function decl "const". */
static inline tree
def_builtin_const (int mask, const char *name, tree type,
tree decl = def_builtin (mask, name, type, code);
if (decl)
TREE_READONLY (decl) = 1;
+ else
+ ix86_builtins_isa[(int) code].const_p = true;
+
return decl;
}
+/* Add any new builtin functions for a given ISA that may not have been
+ declared. This saves a bit of space compared to adding all of the
+ declarations to the tree, even if we didn't use them. */
+
+static void
+ix86_add_new_builtins (int isa)
+{
+ int i;
+ tree decl;
+
+ for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
+ {
+ if ((ix86_builtins_isa[i].isa & isa) != 0
+ && ix86_builtins_isa[i].type != NULL_TREE)
+ {
+ decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
+ ix86_builtins_isa[i].type,
+ i, BUILT_IN_MD, NULL,
+ NULL_TREE);
+
+ ix86_builtins[i] = decl;
+ ix86_builtins_isa[i].type = NULL_TREE;
+ if (ix86_builtins_isa[i].const_p)
+ TREE_READONLY (decl) = 1;
+ }
+ }
+}
+
/* Bits for builtin_description.flag. */
/* Set when we don't support the comparison natively, and should
{
SPECIAL_FTYPE_UNKNOWN,
VOID_FTYPE_VOID,
+ V32QI_FTYPE_PCCHAR,
V16QI_FTYPE_PCCHAR,
+ V8SF_FTYPE_PCV4SF,
+ V8SF_FTYPE_PCFLOAT,
+ V4DF_FTYPE_PCV2DF,
+ V4DF_FTYPE_PCDOUBLE,
V4SF_FTYPE_PCFLOAT,
V2DF_FTYPE_PCDOUBLE,
+ V8SF_FTYPE_PCV8SF_V8SF,
+ V4DF_FTYPE_PCV4DF_V4DF,
V4SF_FTYPE_V4SF_PCV2SF,
+ V4SF_FTYPE_PCV4SF_V4SF,
V2DF_FTYPE_V2DF_PCDOUBLE,
+ V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
VOID_FTYPE_PV2DI_V2DI,
+ VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
+ VOID_FTYPE_PFLOAT_V8SF,
VOID_FTYPE_PFLOAT_V4SF,
+ VOID_FTYPE_PDOUBLE_V4DF,
VOID_FTYPE_PDOUBLE_V2DF,
VOID_FTYPE_PDI_DI,
- VOID_FTYPE_PINT_INT
+ VOID_FTYPE_PINT_INT,
+ VOID_FTYPE_PV8SF_V8SF_V8SF,
+ VOID_FTYPE_PV4DF_V4DF_V4DF,
+ VOID_FTYPE_PV4SF_V4SF_V4SF,
+ VOID_FTYPE_PV2DF_V2DF_V2DF
};
/* Builtin types */
FLOAT128_FTYPE_FLOAT128,
FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST,
+ INT_FTYPE_V4DF_V4DF_PTEST,
+ INT_FTYPE_V4SF_V4SF_PTEST,
INT_FTYPE_V2DI_V2DI_PTEST,
+ INT_FTYPE_V2DF_V2DF_PTEST,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
INT_FTYPE_V16QI,
INT_FTYPE_V8QI,
+ INT_FTYPE_V8SF,
+ INT_FTYPE_V4DF,
INT_FTYPE_V4SF,
INT_FTYPE_V2DF,
V16QI_FTYPE_V16QI,
+ V8SI_FTYPE_V8SF,
+ V8SI_FTYPE_V4SI,
V8HI_FTYPE_V8HI,
V8HI_FTYPE_V16QI,
V8QI_FTYPE_V8QI,
+ V8SF_FTYPE_V8SF,
+ V8SF_FTYPE_V8SI,
+ V8SF_FTYPE_V4SF,
V4SI_FTYPE_V4SI,
V4SI_FTYPE_V16QI,
+ V4SI_FTYPE_V8SI,
V4SI_FTYPE_V8HI,
+ V4SI_FTYPE_V4DF,
V4SI_FTYPE_V4SF,
V4SI_FTYPE_V2DF,
V4HI_FTYPE_V4HI,
+ V4DF_FTYPE_V4DF,
+ V4DF_FTYPE_V4SI,
+ V4DF_FTYPE_V4SF,
+ V4DF_FTYPE_V2DF,
+ V4SF_FTYPE_V4DF,
V4SF_FTYPE_V4SF,
V4SF_FTYPE_V4SF_VEC_MERGE,
+ V4SF_FTYPE_V8SF,
V4SF_FTYPE_V4SI,
V4SF_FTYPE_V2DF,
V2DI_FTYPE_V2DI,
V2DF_FTYPE_V2DF,
V2DF_FTYPE_V2DF_VEC_MERGE,
V2DF_FTYPE_V4SI,
+ V2DF_FTYPE_V4DF,
V2DF_FTYPE_V4SF,
V2DF_FTYPE_V2SI,
V2SI_FTYPE_V2SI,
V8HI_FTYPE_V16QI_V16QI,
V8HI_FTYPE_V4SI_V4SI,
V8HI_FTYPE_V8HI_SI_COUNT,
+ V8SF_FTYPE_V8SF_V8SF,
+ V8SF_FTYPE_V8SF_V8SI,
V4SI_FTYPE_V4SI_V4SI,
V4SI_FTYPE_V4SI_V4SI_COUNT,
V4SI_FTYPE_V8HI_V8HI,
V4HI_FTYPE_V8QI_V8QI,
V4HI_FTYPE_V2SI_V2SI,
V4HI_FTYPE_V4HI_SI_COUNT,
+ V4DF_FTYPE_V4DF_V4DF,
+ V4DF_FTYPE_V4DF_V4DI,
V4SF_FTYPE_V4SF_V4SF,
V4SF_FTYPE_V4SF_V4SF_SWAP,
+ V4SF_FTYPE_V4SF_V4SI,
V4SF_FTYPE_V4SF_V2SI,
V4SF_FTYPE_V4SF_V2DF,
V4SF_FTYPE_V4SF_DI,
V2DF_FTYPE_V2DF_V2DF,
V2DF_FTYPE_V2DF_V2DF_SWAP,
V2DF_FTYPE_V2DF_V4SF,
+ V2DF_FTYPE_V2DF_V2DI,
V2DF_FTYPE_V2DF_DI,
V2DF_FTYPE_V2DF_SI,
V2SF_FTYPE_V2SF_V2SF,
V8HI_FTYPE_V8HI_INT,
V4SI_FTYPE_V4SI_INT,
V4HI_FTYPE_V4HI_INT,
+ V8SF_FTYPE_V8SF_INT,
+ V4SI_FTYPE_V8SI_INT,
+ V4SF_FTYPE_V8SF_INT,
+ V2DF_FTYPE_V4DF_INT,
+ V4DF_FTYPE_V4DF_INT,
V4SF_FTYPE_V4SF_INT,
V2DI_FTYPE_V2DI_INT,
V2DI2TI_FTYPE_V2DI_INT,
V2DF_FTYPE_V2DF_INT,
V16QI_FTYPE_V16QI_V16QI_V16QI,
+ V8SF_FTYPE_V8SF_V8SF_V8SF,
+ V4DF_FTYPE_V4DF_V4DF_V4DF,
V4SF_FTYPE_V4SF_V4SF_V4SF,
V2DF_FTYPE_V2DF_V2DF_V2DF,
V16QI_FTYPE_V16QI_V16QI_INT,
+ V8SI_FTYPE_V8SI_V8SI_INT,
+ V8SI_FTYPE_V8SI_V4SI_INT,
V8HI_FTYPE_V8HI_V8HI_INT,
+ V8SF_FTYPE_V8SF_V8SF_INT,
+ V8SF_FTYPE_V8SF_V4SF_INT,
V4SI_FTYPE_V4SI_V4SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_INT,
+ V4DF_FTYPE_V4DF_V2DF_INT,
V4SF_FTYPE_V4SF_V4SF_INT,
V2DI_FTYPE_V2DI_V2DI_INT,
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
+ V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
+ V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
/* SSE4A */
{ OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
};
/* Builtins with variable number of arguments. */
/* PCLMUL */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
};
/* SSE5 */
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
float_type_node,
NULL_TREE);
+ /* AVX builtins */
+ tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
+ V32QImode);
+ tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
+ V8SImode);
+ tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
+ V8SFmode);
+ tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
+ V4DImode);
+ tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
+ V4DFmode);
+ tree v8sf_ftype_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8sf
+ = build_function_type_list (V8SI_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4df
+ = build_function_type_list (V4SI_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4si
+ = build_function_type_list (V4DF_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4sf
+ = build_function_type_list (V4DF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4df
+ = build_function_type_list (V4SF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si_int
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf_int
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df_int
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v8si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V8SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_v8si_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ V8SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_v4di_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ V4DI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_v4si_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ V4SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_v2df_v2di_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ V2DI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_pcfloat
+ = build_function_type_list (V8SF_type_node,
+ pcfloat_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcdouble
+ = build_function_type_list (V4DF_type_node,
+ pcdouble_type_node,
+ NULL_TREE);
+ tree pcv4sf_type_node
+ = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
+ tree pcv2df_type_node
+ = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv4sf
+ = build_function_type_list (V8SF_type_node,
+ pcv4sf_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv2df
+ = build_function_type_list (V4DF_type_node,
+ pcv2df_type_node,
+ NULL_TREE);
+ tree v32qi_ftype_pcchar
+ = build_function_type_list (V32QI_type_node,
+ pcchar_type_node,
+ NULL_TREE);
+ tree void_ftype_pchar_v32qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V32QI_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v4si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v4sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V4SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v2df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree void_ftype_pfloat_v8sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pdouble_v4df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
+ tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
+ tree pv4df_type_node = build_pointer_type (V4DF_type_node);
+ tree pv2df_type_node = build_pointer_type (V2DF_type_node);
+ tree pcv8sf_type_node
+ = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
+ tree pcv4df_type_node
+ = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ pcv8sf_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ pcv4df_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_pcv4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ pcv4sf_type_node, V4SF_type_node,
+ NULL_TREE);
+ tree v2df_ftype_pcv2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ pcv2df_type_node, V2DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv8sf_v8sf_v8sf
+ = build_function_type_list (void_type_node,
+ pv8sf_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4df_v4df_v4df
+ = build_function_type_list (void_type_node,
+ pv4df_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4sf_v4sf_v4sf
+ = build_function_type_list (void_type_node,
+ pv4sf_type_node, V4SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv2df_v2df_v2df
+ = build_function_type_list (void_type_node,
+ pv2df_type_node, V2DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v2df
+ = build_function_type_list (V4DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v4sf
+ = build_function_type_list (V8SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v4si
+ = build_function_type_list (V8SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v4di_v4di
+ = build_function_type_list (integer_type_node,
+ V4DI_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4di
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2di
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DI_type_node, NULL_TREE);
+
tree ftype;
/* Add all special builtins with variable number of operands. */
case VOID_FTYPE_VOID:
type = void_ftype_void;
break;
+ case V32QI_FTYPE_PCCHAR:
+ type = v32qi_ftype_pcchar;
+ break;
case V16QI_FTYPE_PCCHAR:
type = v16qi_ftype_pcchar;
break;
+ case V8SF_FTYPE_PCV4SF:
+ type = v8sf_ftype_pcv4sf;
+ break;
+ case V8SF_FTYPE_PCFLOAT:
+ type = v8sf_ftype_pcfloat;
+ break;
+ case V4DF_FTYPE_PCV2DF:
+ type = v4df_ftype_pcv2df;
+ break;
+ case V4DF_FTYPE_PCDOUBLE:
+ type = v4df_ftype_pcdouble;
+ break;
case V4SF_FTYPE_PCFLOAT:
type = v4sf_ftype_pcfloat;
break;
case V2DF_FTYPE_PCDOUBLE:
type = v2df_ftype_pcdouble;
break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ type = v8sf_ftype_pcv8sf_v8sf;
+ break;
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ type = v4df_ftype_pcv4df_v4df;
+ break;
case V4SF_FTYPE_V4SF_PCV2SF:
type = v4sf_ftype_v4sf_pcv2sf;
break;
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ type = v4sf_ftype_pcv4sf_v4sf;
+ break;
case V2DF_FTYPE_V2DF_PCDOUBLE:
type = v2df_ftype_v2df_pcdouble;
break;
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ type = v2df_ftype_pcv2df_v2df;
+ break;
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
+ case VOID_FTYPE_PCHAR_V32QI:
+ type = void_ftype_pchar_v32qi;
+ break;
case VOID_FTYPE_PCHAR_V16QI:
type = void_ftype_pchar_v16qi;
break;
+ case VOID_FTYPE_PFLOAT_V8SF:
+ type = void_ftype_pfloat_v8sf;
+ break;
case VOID_FTYPE_PFLOAT_V4SF:
type = void_ftype_pfloat_v4sf;
break;
+ case VOID_FTYPE_PDOUBLE_V4DF:
+ type = void_ftype_pdouble_v4df;
+ break;
case VOID_FTYPE_PDOUBLE_V2DF:
type = void_ftype_pdouble_v2df;
break;
case VOID_FTYPE_PINT_INT:
type = void_ftype_pint_int;
break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ type = void_ftype_pv8sf_v8sf_v8sf;
+ break;
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ type = void_ftype_pv4df_v4df_v4df;
+ break;
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ type = void_ftype_pv4sf_v4sf_v4sf;
+ break;
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ type = void_ftype_pv2df_v2df_v2df;
+ break;
default:
gcc_unreachable ();
}
case FLOAT_FTYPE_FLOAT:
type = float_ftype_float;
break;
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ type = int_ftype_v8sf_v8sf;
+ break;
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ type = int_ftype_v4di_v4di;
+ break;
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ type = int_ftype_v4df_v4df;
+ break;
+ case INT_FTYPE_V4SF_V4SF_PTEST:
+ type = int_ftype_v4sf_v4sf;
+ break;
case INT_FTYPE_V2DI_V2DI_PTEST:
type = int_ftype_v2di_v2di;
break;
+ case INT_FTYPE_V2DF_V2DF_PTEST:
+ type = int_ftype_v2df_v2df;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
case INT_FTYPE_V8QI:
type = int_ftype_v8qi;
break;
+ case INT_FTYPE_V8SF:
+ type = int_ftype_v8sf;
+ break;
+ case INT_FTYPE_V4DF:
+ type = int_ftype_v4df;
+ break;
case INT_FTYPE_V4SF:
type = int_ftype_v4sf;
break;
case V16QI_FTYPE_V16QI:
type = v16qi_ftype_v16qi;
break;
+ case V8SI_FTYPE_V8SF:
+ type = v8si_ftype_v8sf;
+ break;
+ case V8SI_FTYPE_V4SI:
+ type = v8si_ftype_v4si;
+ break;
case V8HI_FTYPE_V8HI:
type = v8hi_ftype_v8hi;
break;
case V8QI_FTYPE_V8QI:
type = v8qi_ftype_v8qi;
break;
+ case V8SF_FTYPE_V8SF:
+ type = v8sf_ftype_v8sf;
+ break;
+ case V8SF_FTYPE_V8SI:
+ type = v8sf_ftype_v8si;
+ break;
+ case V8SF_FTYPE_V4SF:
+ type = v8sf_ftype_v4sf;
+ break;
+ case V4SI_FTYPE_V4DF:
+ type = v4si_ftype_v4df;
+ break;
case V4SI_FTYPE_V4SI:
type = v4si_ftype_v4si;
break;
case V4SI_FTYPE_V16QI:
type = v4si_ftype_v16qi;
break;
+ case V4SI_FTYPE_V8SI:
+ type = v4si_ftype_v8si;
+ break;
case V4SI_FTYPE_V8HI:
type = v4si_ftype_v8hi;
break;
case V4HI_FTYPE_V4HI:
type = v4hi_ftype_v4hi;
break;
+ case V4DF_FTYPE_V4DF:
+ type = v4df_ftype_v4df;
+ break;
+ case V4DF_FTYPE_V4SI:
+ type = v4df_ftype_v4si;
+ break;
+ case V4DF_FTYPE_V4SF:
+ type = v4df_ftype_v4sf;
+ break;
+ case V4DF_FTYPE_V2DF:
+ type = v4df_ftype_v2df;
+ break;
case V4SF_FTYPE_V4SF:
case V4SF_FTYPE_V4SF_VEC_MERGE:
type = v4sf_ftype_v4sf;
break;
+ case V4SF_FTYPE_V8SF:
+ type = v4sf_ftype_v8sf;
+ break;
case V4SF_FTYPE_V4SI:
type = v4sf_ftype_v4si;
break;
+ case V4SF_FTYPE_V4DF:
+ type = v4sf_ftype_v4df;
+ break;
case V4SF_FTYPE_V2DF:
type = v4sf_ftype_v2df;
break;
case V2SI_FTYPE_V2SF:
type = v2si_ftype_v2sf;
break;
+ case V2DF_FTYPE_V4DF:
+ type = v2df_ftype_v4df;
+ break;
case V2DF_FTYPE_V4SF:
type = v2df_ftype_v4sf;
break;
case V8HI_FTYPE_V8HI_SI_COUNT:
type = v8hi_ftype_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf;
+ break;
+ case V8SF_FTYPE_V8SF_V8SI:
+ type = v8sf_ftype_v8sf_v8si;
+ break;
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V4SI_V4SI_COUNT:
type = v4si_ftype_v4si_v4si;
case V4HI_FTYPE_V4HI_SI_COUNT:
type = v4hi_ftype_v4hi_int;
break;
+ case V4DF_FTYPE_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df;
+ break;
+ case V4DF_FTYPE_V4DF_V4DI:
+ type = v4df_ftype_v4df_v4di;
+ break;
case V4SF_FTYPE_V4SF_V4SF:
case V4SF_FTYPE_V4SF_V4SF_SWAP:
type = v4sf_ftype_v4sf_v4sf;
break;
+ case V4SF_FTYPE_V4SF_V4SI:
+ type = v4sf_ftype_v4sf_v4si;
+ break;
case V4SF_FTYPE_V4SF_V2SI:
type = v4sf_ftype_v4sf_v2si;
break;
case V2DF_FTYPE_V2DF_V4SF:
type = v2df_ftype_v2df_v4sf;
break;
+ case V2DF_FTYPE_V2DF_V2DI:
+ type = v2df_ftype_v2df_v2di;
+ break;
case V2DF_FTYPE_V2DF_DI:
type = v2df_ftype_v2df_int64;
break;
case V8HI_FTYPE_V8HI_INT:
type = v8hi_ftype_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_INT:
+ type = v8sf_ftype_v8sf_int;
+ break;
case V4SI_FTYPE_V4SI_INT:
type = v4si_ftype_v4si_int;
break;
+ case V4SI_FTYPE_V8SI_INT:
+ type = v4si_ftype_v8si_int;
+ break;
case V4HI_FTYPE_V4HI_INT:
type = v4hi_ftype_v4hi_int;
break;
+ case V4DF_FTYPE_V4DF_INT:
+ type = v4df_ftype_v4df_int;
+ break;
case V4SF_FTYPE_V4SF_INT:
type = v4sf_ftype_v4sf_int;
break;
+ case V4SF_FTYPE_V8SF_INT:
+ type = v4sf_ftype_v8sf_int;
+ break;
case V2DI_FTYPE_V2DI_INT:
case V2DI2TI_FTYPE_V2DI_INT:
type = v2di_ftype_v2di_int;
case V2DF_FTYPE_V2DF_INT:
type = v2df_ftype_v2df_int;
break;
+ case V2DF_FTYPE_V4DF_INT:
+ type = v2df_ftype_v4df_int;
+ break;
case V16QI_FTYPE_V16QI_V16QI_V16QI:
type = v16qi_ftype_v16qi_v16qi_v16qi;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf_v8sf;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df_v4df;
+ break;
case V4SF_FTYPE_V4SF_V4SF_V4SF:
type = v4sf_ftype_v4sf_v4sf_v4sf;
break;
case V16QI_FTYPE_V16QI_V16QI_INT:
type = v16qi_ftype_v16qi_v16qi_int;
break;
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ type = v8si_ftype_v8si_v8si_int;
+ break;
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ type = v8si_ftype_v8si_v4si_int;
+ break;
case V8HI_FTYPE_V8HI_V8HI_INT:
type = v8hi_ftype_v8hi_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ type = v8sf_ftype_v8sf_v8sf_int;
+ break;
+ case V8SF_FTYPE_V8SF_V4SF_INT:
+ type = v8sf_ftype_v8sf_v4sf_int;
+ break;
case V4SI_FTYPE_V4SI_V4SI_INT:
type = v4si_ftype_v4si_v4si_int;
break;
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ type = v4df_ftype_v4df_v4df_int;
+ break;
+ case V4DF_FTYPE_V4DF_V2DF_INT:
+ type = v4df_ftype_v4df_v2df_int;
+ break;
case V4SF_FTYPE_V4SF_V4SF_INT:
type = v4sf_ftype_v4sf_v4sf_int;
break;
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ type = v8sf_ftype_v8sf_v8sf_v8si_int;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ type = v4df_ftype_v4df_v4df_v4di_int;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ type = v4sf_ftype_v4sf_v4sf_v4si_int;
+ break;
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ type = v2df_ftype_v2df_v2df_v2di_int;
+ break;
default:
gcc_unreachable ();
}
/* PCLMUL */
def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
+ /* AVX */
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
+ TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
+
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
switch ((enum ix86_builtin_type) d->flag)
{
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ case INT_FTYPE_V4SF_V4SF_PTEST:
case INT_FTYPE_V2DI_V2DI_PTEST:
+ case INT_FTYPE_V2DF_V2DF_PTEST:
return ix86_expand_sse_ptest (d, exp, target);
case FLOAT128_FTYPE_FLOAT128:
case FLOAT_FTYPE_FLOAT:
case INT64_FTYPE_V2DF:
case INT_FTYPE_V16QI:
case INT_FTYPE_V8QI:
+ case INT_FTYPE_V8SF:
+ case INT_FTYPE_V4DF:
case INT_FTYPE_V4SF:
case INT_FTYPE_V2DF:
case V16QI_FTYPE_V16QI:
+ case V8SI_FTYPE_V8SF:
+ case V8SI_FTYPE_V4SI:
case V8HI_FTYPE_V8HI:
case V8HI_FTYPE_V16QI:
case V8QI_FTYPE_V8QI:
+ case V8SF_FTYPE_V8SF:
+ case V8SF_FTYPE_V8SI:
+ case V8SF_FTYPE_V4SF:
case V4SI_FTYPE_V4SI:
case V4SI_FTYPE_V16QI:
case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8SI:
case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V4DF:
case V4SI_FTYPE_V2DF:
case V4HI_FTYPE_V4HI:
+ case V4DF_FTYPE_V4DF:
+ case V4DF_FTYPE_V4SI:
+ case V4DF_FTYPE_V4SF:
+ case V4DF_FTYPE_V2DF:
case V4SF_FTYPE_V4SF:
case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V8SF:
+ case V4SF_FTYPE_V4DF:
case V4SF_FTYPE_V2DF:
case V2DI_FTYPE_V2DI:
case V2DI_FTYPE_V16QI:
case V2DI_FTYPE_V4SI:
case V2DF_FTYPE_V2DF:
case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4DF:
case V2DF_FTYPE_V4SF:
case V2DF_FTYPE_V2SI:
case V2SI_FTYPE_V2SI:
case V8HI_FTYPE_V8HI_V8HI:
case V8HI_FTYPE_V16QI_V16QI:
case V8HI_FTYPE_V4SI_V4SI:
+ case V8SF_FTYPE_V8SF_V8SF:
+ case V8SF_FTYPE_V8SF_V8SI:
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V8HI_V8HI:
case V4SI_FTYPE_V4SF_V4SF:
case V4HI_FTYPE_V4HI_V4HI:
case V4HI_FTYPE_V8QI_V8QI:
case V4HI_FTYPE_V2SI_V2SI:
+ case V4DF_FTYPE_V4DF_V4DF:
+ case V4DF_FTYPE_V4DF_V4DI:
case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V4SI:
case V4SF_FTYPE_V4SF_V2SI:
case V4SF_FTYPE_V4SF_V2DF:
case V4SF_FTYPE_V4SF_DI:
case V2SI_FTYPE_V2SF_V2SF:
case V2DF_FTYPE_V2DF_V2DF:
case V2DF_FTYPE_V2DF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DI:
case V2DF_FTYPE_V2DF_DI:
case V2DF_FTYPE_V2DF_SI:
case V2SF_FTYPE_V2SF_V2SF:
nargs_constant = 1;
break;
case V8HI_FTYPE_V8HI_INT:
+ case V8SF_FTYPE_V8SF_INT:
case V4SI_FTYPE_V4SI_INT:
+ case V4SI_FTYPE_V8SI_INT:
case V4HI_FTYPE_V4HI_INT:
+ case V4DF_FTYPE_V4DF_INT:
case V4SF_FTYPE_V4SF_INT:
+ case V4SF_FTYPE_V8SF_INT:
case V2DI_FTYPE_V2DI_INT:
case V2DF_FTYPE_V2DF_INT:
+ case V2DF_FTYPE_V4DF_INT:
nargs = 2;
nargs_constant = 1;
break;
case V16QI_FTYPE_V16QI_V16QI_V16QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
nargs = 3;
break;
case V16QI_FTYPE_V16QI_V16QI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
nargs = 3;
nargs_constant = 2;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ nargs = 4;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
case CODE_FOR_sse4_1_blendps:
+ case CODE_FOR_avx_blendpd256:
+ case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_roundpd256:
+ case CODE_FOR_avx_roundps256:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_sse4_1_blendpd:
+ case CODE_FOR_avx_vpermilv2df:
+ case CODE_FOR_avx_vpermil2v2df3:
+ case CODE_FOR_avx_vpermil2v4sf3:
+ case CODE_FOR_avx_vpermil2v4df3:
+ case CODE_FOR_avx_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
+ case CODE_FOR_avx_vextractf128v4df:
+ case CODE_FOR_avx_vextractf128v8sf:
+ case CODE_FOR_avx_vextractf128v8si:
+ case CODE_FOR_avx_vinsertf128v4df:
+ case CODE_FOR_avx_vinsertf128v8sf:
+ case CODE_FOR_avx_vinsertf128v8si:
+ error ("the last argument must be a 1-bit immediate");
+ return const0_rtx;
+
+ case CODE_FOR_avx_cmpsdv2df3:
+ case CODE_FOR_avx_cmpssv4sf3:
+ case CODE_FOR_avx_cmppdv2df3:
+ case CODE_FOR_avx_cmppsv4sf3:
+ case CODE_FOR_avx_cmppdv4df3:
+ case CODE_FOR_avx_cmppsv8sf3:
+ error ("the last argument must be a 5-bit immediate");
+ return const0_rtx;
+
default:
switch (nargs_constant)
{
emit_insn (GEN_FCN (icode) (target));
return 0;
case V2DI_FTYPE_PV2DI:
+ case V32QI_FTYPE_PCCHAR:
case V16QI_FTYPE_PCCHAR:
+ case V8SF_FTYPE_PCV4SF:
+ case V8SF_FTYPE_PCFLOAT:
case V4SF_FTYPE_PCFLOAT:
+ case V4DF_FTYPE_PCV2DF:
+ case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
nargs = 1;
klass = load;
break;
case VOID_FTYPE_PV2SF_V4SF:
case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V8SF:
case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
case VOID_FTYPE_PDI_DI:
case VOID_FTYPE_PINT_INT:
klass = load;
memory = 1;
break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ nargs = 2;
+ klass = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ nargs = 2;
+ klass = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
default:
gcc_unreachable ();
}
current ISA based on the command line switches. With function specific
options, we need to check in the context of the function making the call
whether it is supported. */
- if (ix86_builtins_isa[fcode]
- && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
+ if (ix86_builtins_isa[fcode].isa
+ && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
{
- char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
+ char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
NULL, NULL, false);
if (!opts)
{
if (!in)
return ix86_cost->int_store[0];
- if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
+ if (TARGET_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun))
cost = ix86_cost->movzbl_load;
else
cost = ix86_cost->int_load[0];
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. */
- return (VALID_SSE_REG_MODE (mode)
+ are available. OImode move is available only when AVX is
+ enabled. */
+ return ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_SSE_REG_MODE (mode)
|| VALID_SSE2_REG_MODE (mode)
|| VALID_MMX_REG_MODE (mode)
|| VALID_MMX_REG_MODE_3DNOW (mode));
scanned. In either case, *TOTAL contains the cost result. */
static bool
-ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
+ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
switch (code)
{
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
- *total = ix86_cost->add;
+ *total = cost->add;
else
- *total = ix86_cost->movzx;
+ *total = cost->movzx;
return false;
case SIGN_EXTEND:
- *total = ix86_cost->movsx;
+ *total = cost->movsx;
return false;
case ASHIFT:
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
}
if ((value == 2 || value == 3)
- && ix86_cost->lea <= ix86_cost->shift_const)
+ && cost->lea <= cost->shift_const)
{
- *total = ix86_cost->lea;
+ *total = cost->lea;
return false;
}
}
if (CONST_INT_P (XEXP (x, 1)))
{
if (INTVAL (XEXP (x, 1)) > 32)
- *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
+ *total = cost->shift_const + COSTS_N_INSNS (2);
else
- *total = ix86_cost->shift_const * 2;
+ *total = cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
- *total = ix86_cost->shift_var * 2;
+ *total = cost->shift_var * 2;
else
- *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
+ *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (CONST_INT_P (XEXP (x, 1)))
- *total = ix86_cost->shift_const;
+ *total = cost->shift_const;
else
- *total = ix86_cost->shift_var;
+ *total = cost->shift_var;
}
return false;
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE scalar cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
- *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
- + nbits * ix86_cost->mult_bit
- + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
+ *total = (cost->mult_init[MODE_INDEX (mode)]
+ + nbits * cost->mult_bit
+ + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
return true;
}
case UMOD:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else
- *total = ix86_cost->divide[MODE_INDEX (mode)];
+ *total = cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
/* FALLTHRU */
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
- *total = (ix86_cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code)
+ *total = (cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code)
+ + (rtx_cost (XEXP (x, 1), outer_code, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
- *total = ix86_cost->add * 2;
+ *total = cost->add * 2;
else
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
case COMPARE:
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
- *total = (ix86_cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
- + rtx_cost (const1_rtx, outer_code));
+ *total = (cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ + rtx_cost (const1_rtx, outer_code, speed));
return true;
}
return false;
case ABS:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
return false;
case SQRT:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
return false;
case UNSPEC:
static void
ix86_reorg (void)
{
- if (TARGET_PAD_RETURNS && optimize && !optimize_size)
+ if (TARGET_PAD_RETURNS && optimize
+ && optimize_function_for_speed_p (cfun))
ix86_pad_returns ();
- if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
+ if (TARGET_FOUR_JUMP_LIMIT && optimize
+ && optimize_function_for_speed_p (cfun))
ix86_avoid_jump_misspredicts ();
}
bool
x86_extended_reg_mentioned_p (rtx insn)
{
- return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
+ return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
+ extended_reg_mentioned_1, NULL);
}
/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
rtx target, rtx val)
{
- enum machine_mode smode, wsmode, wvmode;
+ enum machine_mode hmode, smode, wsmode, wvmode;
rtx x;
switch (mode)
emit_move_insn (target, gen_lowpart (mode, x));
return true;
+ case V4DFmode:
+ hmode = V2DFmode;
+ goto half;
+ case V4DImode:
+ hmode = V2DImode;
+ goto half;
+ case V8SFmode:
+ hmode = V4SFmode;
+ goto half;
+ case V8SImode:
+ hmode = V4SImode;
+ goto half;
+ case V16HImode:
+ hmode = V8HImode;
+ goto half;
+ case V32QImode:
+ hmode = V16QImode;
+ goto half;
+half:
+ {
+ rtx tmp = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
+ }
+ return true;
+
default:
return false;
}
case V4HImode:
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
break;
+ case V32QImode:
+ case V16HImode:
+ case V8SImode:
+ case V8SFmode:
+ case V4DImode:
+ case V4DFmode:
+ use_vector_set = TARGET_AVX;
+ break;
default:
break;
}
the general case. */
return false;
+ case V4DFmode:
+ case V4DImode:
+ case V8SFmode:
+ case V8SImode:
+ case V16HImode:
+ case V32QImode:
case V4SFmode:
case V4SImode:
case V8HImode:
rtx target, rtx *ops, int n)
{
enum machine_mode cmode, hmode = VOIDmode;
- rtx first[4], second[2];
+ rtx first[8], second[4];
rtvec v;
int i, j;
case 2:
switch (mode)
{
+ case V8SImode:
+ cmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V4SFmode;
+ break;
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
case V4SImode:
cmode = V2SImode;
break;
case 4:
switch (mode)
{
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
case V4SImode:
cmode = V2SImode;
break;
}
goto half;
+ case 8:
+ switch (mode)
+ {
+ case V8SImode:
+ cmode = V2SImode;
+ hmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V2SFmode;
+ hmode = V4SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
ix86_expand_vector_init_interleave (enum machine_mode mode,
rtx target, rtx *ops, int n)
{
- enum machine_mode first_imode, second_imode, third_imode;
+ enum machine_mode first_imode, second_imode, third_imode, inner_mode;
int i, j;
rtx op0, op1;
rtx (*gen_load_even) (rtx, rtx, rtx);
gen_load_even = gen_vec_setv8hi;
gen_interleave_first_low = gen_vec_interleave_lowv4si;
gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ inner_mode = HImode;
first_imode = V4SImode;
second_imode = V2DImode;
third_imode = VOIDmode;
gen_load_even = gen_vec_setv16qi;
gen_interleave_first_low = gen_vec_interleave_lowv8hi;
gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ inner_mode = QImode;
first_imode = V8HImode;
second_imode = V4SImode;
third_imode = V2DImode;
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even elements into the second positon. */
- emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+ emit_insn ((*gen_load_even) (op0,
+ force_reg (inner_mode,
+ ops [i + i + 1]),
const1_rtx));
/* Cast vector to FIRST_IMODE vector. */
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[16];
+ rtx ops[32], op0, op1;
+ enum machine_mode half_mode = VOIDmode;
int n, i;
switch (mode)
break;
/* FALLTHRU */
+ case V8SFmode:
+ case V8SImode:
+ case V4DFmode:
+ case V4DImode:
case V4SFmode:
case V4SImode:
case V2DFmode:
ix86_expand_vector_init_concat (mode, target, ops, n);
return;
+ case V32QImode:
+ half_mode = V16QImode;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ goto half;
+
+half:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (half_mode);
+ op1 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (half_mode, op0, ops,
+ n >> 2);
+ ix86_expand_vector_init_interleave (half_mode, op1,
+ &ops [n >> 1], n >> 2);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op0, op1)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
if (!TARGET_SSE2)
break;
+ /* Don't use ix86_expand_vector_init_interleave if we can't
+ move from GPR to SSE register directly. */
+ if (!TARGET_INTER_UNIT_MOVES)
+ break;
+
n = GET_MODE_NUNITS (mode);
for (i = 0; i < n; i++)
ops[i] = XVECEXP (vals, 0, i);
{
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ enum machine_mode half_mode;
bool use_vec_merge = false;
rtx tmp;
+ static rtx (*gen_extract[6][2]) (rtx, rtx)
+ = {
+ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+ };
+ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+ = {
+ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+ };
+ int i, j, n;
switch (mode)
{
break;
case V8QImode:
+ break;
+
+ case V32QImode:
+ half_mode = V16QImode;
+ j = 0;
+ n = 16;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ j = 1;
+ n = 8;
+ goto half;
+
+ case V8SImode:
+ half_mode = V4SImode;
+ j = 2;
+ n = 4;
+ goto half;
+
+ case V4DImode:
+ half_mode = V2DImode;
+ j = 3;
+ n = 2;
+ goto half;
+
+ case V8SFmode:
+ half_mode = V4SFmode;
+ j = 4;
+ n = 4;
+ goto half;
+
+ case V4DFmode:
+ half_mode = V2DFmode;
+ j = 5;
+ n = 2;
+ goto half;
+
+half:
+ /* Compute offset. */
+ i = elt / n;
+ elt %= n;
+
+ gcc_assert (i <= 1);
+
+ /* Extract the half. */
+ tmp = gen_reg_rtx (half_mode);
+ emit_insn ((*gen_extract[j][i]) (tmp, target));
+
+ /* Put val in tmp at elt. */
+ ix86_expand_vector_set (false, tmp, val, elt);
+
+ /* Put it back. */
+ emit_insn ((*gen_insert[j][i]) (target, target, tmp));
+ return;
+
default:
break;
}
return true;
if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
return true;
+ if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+ return true;
if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
#undef TARGET_OPTION_VALID_ATTRIBUTE_P
-#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
+#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
#undef TARGET_OPTION_SAVE
#define TARGET_OPTION_SAVE ix86_function_specific_save
#undef TARGET_OPTION_CAN_INLINE_P
#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
-#undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
-#define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
-
-#undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
-#define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
-
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#include "gt-i386.h"