#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
+#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
1, /* cond_not_taken_branch_cost. */
};
-struct processor_costs bdver1_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {3, 4, 3}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- /* On K8
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
- 64, /* size of l1 cache. */
- 1024, /* size of l2 cache. */
- 64, /* size of prefetch block */
- /* New AMD processors never drop prefetches; if they cannot be performed
- immediately, they are queued. We set number of simultaneous prefetches
- to a large constant to reflect this (it probably is not a good idea not
- to limit number of prefetches at all, as their execution also takes some
- time). */
- 100, /* number of parallel prefetches */
- 2, /* Branch cost */
- COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (4), /* cost of FMUL instruction. */
- COSTS_N_INSNS (19), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-
- /* BDVER1 has optimized REP instruction for medium sized blocks, but for
- very small blocks it is better to use loop. For large blocks, libcall can
- do nontemporary accesses and beat inline considerably. */
- {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
- {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- {{libcall, {{8, loop}, {24, unrolled_loop},
- {2048, rep_prefix_4_byte}, {-1, libcall}}},
- {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 6, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 2, /* vec_store_cost. */
- 2, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
-};
-
static const
struct processor_costs pentium4_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
#define m_ATHLON (1<<PROCESSOR_ATHLON)
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
-#define m_BDVER1 (1<<PROCESSOR_BDVER1)
-#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
+#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
~m_386,
/* X86_TUNE_USE_SAHF */
- m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
+ m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
- | m_AMDFAM10 | m_BDVER1,
-
- /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_AMDFAM10 | m_BDVER1,
+ | m_AMDFAM10,
- /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_BDVER1,
-
- /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
- m_BDVER1,
+ /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
+ m_AMDFAM10,
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
~(m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
- ~(m_AMDFAM10 | m_BDVER1),
+ ~(m_AMDFAM10),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
vector path on AMD machines. */
- m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
+ m_K8 | m_GENERIC64 | m_AMDFAM10,
/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
machines. */
- m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
+ m_K8 | m_GENERIC64 | m_AMDFAM10,
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
than a MOV. */
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
- m_CORE2 | m_BDVER1,
+ m_CORE2,
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
\f
static struct machine_function * ix86_init_machine_status (void);
static rtx ix86_function_value (const_tree, const_tree, bool);
-static bool ix86_function_value_regno_p (const unsigned int);
static rtx ix86_static_chain (const_tree, bool);
static int ix86_function_regparm (const_tree, const_tree);
static void ix86_compute_frame_layout (struct ix86_frame *);
{&generic32_cost, 16, 7, 16, 7, 16},
{&generic64_cost, 16, 10, 16, 10, 16},
{&amdfam10_cost, 32, 24, 32, 7, 32},
- {&bdver1_cost, 32, 24, 32, 7, 32},
{&atom_cost, 16, 7, 16, 7, 16}
};
"athlon",
"athlon-4",
"k8",
- "amdfam10",
- "bdver1"
+ "amdfam10"
};
\f
/* Implement TARGET_HANDLE_OPTION. */
if (isa && add_nl_p)
{
opts[num++][0] = isa_other;
- sprintf (isa_other, "(other isa: %#x)", isa);
+ sprintf (isa_other, "(other isa: 0x%x)", isa);
}
/* Add flag options. */
if (flags && add_nl_p)
{
opts[num++][0] = target_other;
- sprintf (target_other, "(other flags: %#x)", flags);
+ sprintf (target_other, "(other flags: 0x%x)", isa);
}
/* Add -fpmath= option. */
{"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
- {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
- PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
- | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
- | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
- | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
flag_schedule_insns = 0;
#endif
- /* For -O2 and beyond, turn on -fzee for x86_64 target. */
- if (level > 1 && TARGET_64BIT)
- flag_zee = 1;
-
if (TARGET_MACHO)
/* The Darwin libraries never set errno, so we might as well
avoid calling them when that's the only reason we would. */
return true;
}
-/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
- and "sseregparm" calling convention attributes;
+/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
+ calling convention attributes;
arguments as in struct attribute_spec.handler. */
static tree
error ("fastcall and regparm attributes are not compatible");
}
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
- {
- error ("regparam and thiscall attributes are not compatible");
- }
-
cst = TREE_VALUE (args);
if (TREE_CODE (cst) != INTEGER_CST)
{
if (TARGET_64BIT)
{
/* Do not warn when emulating the MS ABI. */
- if ((TREE_CODE (*node) != FUNCTION_TYPE
- && TREE_CODE (*node) != METHOD_TYPE)
+ if (TREE_CODE (*node) != FUNCTION_TYPE
|| ix86_function_type_abi (*node) != MS_ABI)
warning (OPT_Wattributes, "%qE attribute ignored",
name);
{
error ("fastcall and regparm attributes are not compatible");
}
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and thiscall attributes are not compatible");
- }
}
/* Can combine stdcall with fastcall (redundant), regparm and
{
error ("stdcall and fastcall attributes are not compatible");
}
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
- {
- error ("stdcall and thiscall attributes are not compatible");
- }
}
/* Can combine cdecl with regparm and sseregparm. */
{
error ("fastcall and cdecl attributes are not compatible");
}
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
- {
- error ("cdecl and thiscall attributes are not compatible");
- }
- }
- else if (is_attribute_p ("thiscall", name))
- {
- if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
- warning (OPT_Wattributes, "%qE attribute is used for none class-method",
- name);
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("stdcall and thiscall attributes are not compatible");
- }
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and thiscall attributes are not compatible");
- }
- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
- {
- error ("cdecl and thiscall attributes are not compatible");
- }
}
/* Can combine sseregparm with all attributes. */
!= !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
return 0;
- /* Check for mismatched thiscall types. */
- if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
- return 0;
-
/* Check for mismatched return types (cdecl vs stdcall). */
if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
!= !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
return 2;
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
- return 1;
-
/* Use register calling convention for local functions when possible. */
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL
/* Stdcall and fastcall functions will pop the stack if not
variable args. */
if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
if (rtd && ! stdarg_p (funtype))
else look for regparm information. */
if (fntype)
{
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
- {
- cum->nregs = 1;
- cum->fastcall = 1; /* Same first register as in fastcall. */
- }
- else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
{
cum->nregs = 2;
cum->fastcall = 1;
/* Return true if N is a possible register number of function value. */
-static bool
-ix86_function_value_regno_p (const unsigned int regno)
+bool
+ix86_function_value_regno_p (int regno)
{
switch (regno)
{
{
rtx save_area, mem;
rtx label;
+ rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx ();
+ label_ref = gen_rtx_LABEL_REF (Pmode, label);
+ /* Compute address to jump to :
+ label - eax*4 + nnamed_sse_arguments*4 Or
+ label - eax*5 + nnamed_sse_arguments*5 for AVX. */
+ tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_MULT (Pmode, nsse_reg,
+ GEN_INT (4))));
+
+ /* vmovaps is one byte longer than movaps. */
+ if (TARGET_AVX)
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_PLUS (Pmode, tmp_reg,
+ nsse_reg)));
+
+ if (cum->sse_regno)
+ emit_move_insn
+ (nsse_reg,
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ label_ref,
+ GEN_INT (cum->sse_regno
+ * (TARGET_AVX ? 5 : 4)))));
+ else
+ emit_move_insn (nsse_reg, label_ref);
+ emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
- set_mem_align (mem, 64);
+ set_mem_align (mem, BITS_PER_WORD);
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label,
- gen_reg_rtx (Pmode)));
+ GEN_INT (cum->sse_regno), label));
}
}
int indirect_p = 0;
tree ptrtype;
enum machine_mode nat_mode;
- unsigned int arg_boundary;
+ int arg_boundary;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
- if (crtl->stack_alignment_needed < arg_boundary)
- crtl->stack_alignment_needed = arg_boundary;
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
case MODE_V4SF:
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
- else
- return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
- else
- return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+ return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
case MODE_V8SF:
return "vxorps\t%x0, %x0, %x0";
case MODE_V4DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
- return "vxorpd\t%x0, %x0, %x0";
+ return "vxorpd\t%x0, %x0, %x0";
case MODE_OI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
- return "vpxor\t%x0, %x0, %x0";
+ return "vpxor\t%x0, %x0, %x0";
default:
break;
}
assemble_name (asm_out_file, name);
fputs ("\n", asm_out_file);
ASM_OUTPUT_LABEL (asm_out_file, name);
- DECL_WEAK (decl) = 1;
}
else
#endif
DECL_INITIAL (decl) = make_node (BLOCK);
current_function_decl = decl;
init_function_start (decl);
- first_function_block_is_cold = false;
/* Make sure unwind info is emitted for the thunk if needed. */
final_start_function (emit_barrier (), asm_out_file, 1);
&& cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
{
int count = frame->nregs;
- struct cgraph_node *node = cgraph_node (current_function_decl);
cfun->machine->use_fast_prologue_epilogue_nregs = count;
/* The fast prologue uses move instead of push to save registers. This
slow to use many of them. */
if (count)
count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
- if (node->frequency < NODE_FREQUENCY_NORMAL
+ if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
|| (flag_branch_probabilities
- && node->frequency < NODE_FREQUENCY_HOT))
+ && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
cfun->machine->use_fast_prologue_epilogue = false;
else
cfun->machine->use_fast_prologue_epilogue
passing. */
if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
&& !lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (decl)))
- && !lookup_attribute ("thiscall",
TYPE_ATTRIBUTES (TREE_TYPE (decl))))
return CX_REG;
else
ix86_cfa_state->reg == stack_pointer_rtx);
else
{
+ /* Only valid for Win32. */
rtx eax = gen_rtx_REG (Pmode, AX_REG);
bool eax_live;
rtx t;
+ gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
+
if (cfun->machine->call_abi == MS_ABI)
eax_live = false;
else
rtx base_reg, index_reg;
HOST_WIDE_INT scale = 1;
rtx scale_rtx = NULL_RTX;
- rtx tmp;
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
scale_rtx = XEXP (op, 1);
break;
- case ASHIFT:
- if (index)
- return 0;
- index = XEXP (op, 0);
- tmp = XEXP (op, 1);
- if (!CONST_INT_P (tmp))
- return 0;
- scale = INTVAL (tmp);
- if ((unsigned HOST_WIDE_INT) scale > 3)
- return 0;
- scale = 1 << scale;
- break;
-
case UNSPEC:
if (XINT (op, 1) == UNSPEC_TP
&& TARGET_TLS_DIRECT_SEG_REFS
}
else if (GET_CODE (addr) == ASHIFT)
{
+ rtx tmp;
+
/* We're called for lea too, which implements ashift on occasion. */
index = XEXP (addr, 0);
tmp = XEXP (addr, 1);
|| XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
|| !MEM_P (orig_x))
return orig_x;
- x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != Pmode)
- return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
- return x;
+ return XVECEXP (XEXP (x, 0), 0, 0);
}
if (GET_CODE (x) != PLUS
else
return orig_x;
}
- if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
- return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
return result;
}
return cfun->machine->some_ld_name;
for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (NONDEBUG_INSN_P (insn)
+ if (INSN_P (insn)
&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
return cfun->machine->some_ld_name;
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
+ E,e -- likewise, but for compare-and-branch fused insn.
F,f -- likewise, but for floating-point.
O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
otherwise nothing
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
return;
+ case 'E':
+ put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
+ return;
+
+ case 'e':
+ put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
+ return;
+
case 'H':
/* It doesn't actually matter what mode we use here, as we're
only going to use this for printing. */
return;
case ';':
-#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX
- fputs (";", file);
+#if TARGET_MACHO
+ fputs (" ; ", file);
+#else
+ putc (' ', file);
#endif
return;
switch (GET_MODE_SIZE (mode))
{
case 16:
- /* If we're optimizing for size, movups is the smallest. */
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_avx_movups (op0, op1));
- return;
- }
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
emit_insn (gen_avx_movdqu (op0, op1));
emit_insn (gen_avx_movups256 (op0, op1));
break;
case V2DFmode:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_avx_movups (op0, op1));
- return;
- }
emit_insn (gen_avx_movupd (op0, op1));
break;
case V4DFmode:
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ if (optimize_insn_for_size_p ())
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
{
rtx zero;
- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
- {
- op0 = gen_lowpart (V2DFmode, op0);
- op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
- return;
- }
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V2DFmode, op0);
+ op1 = gen_lowpart (V2DFmode, op1);
+ emit_insn (gen_sse2_movupd (op0, op1));
+ return;
+ }
/* When SSE registers are split into halves, we can avoid
writing to the top half twice. */
}
else
{
- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- return;
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
}
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
else if (MEM_P (op0))
{
/* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ if (optimize_insn_for_size_p ())
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
if (TARGET_SSE2 && mode == V2DFmode)
{
- if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
- {
- op0 = gen_lowpart (V2DFmode, op0);
- op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
- }
- else
- {
- m = adjust_address (op0, DFmode, 0);
- emit_insn (gen_sse2_storelpd (m, op1));
- m = adjust_address (op0, DFmode, 8);
- emit_insn (gen_sse2_storehpd (m, op1));
- }
+ m = adjust_address (op0, DFmode, 0);
+ emit_insn (gen_sse2_storelpd (m, op1));
+ m = adjust_address (op0, DFmode, 8);
+ emit_insn (gen_sse2_storehpd (m, op1));
}
else
{
if (mode != V4SFmode)
op1 = gen_lowpart (V4SFmode, op1);
-
- if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
- }
- else
- {
- m = adjust_address (op0, V2SFmode, 0);
- emit_insn (gen_sse_storelps (m, op1));
- m = adjust_address (op0, V2SFmode, 8);
- emit_insn (gen_sse_storehps (m, op1));
- }
+ m = adjust_address (op0, V2SFmode, 0);
+ emit_insn (gen_sse_storelps (m, op1));
+ m = adjust_address (op0, V2SFmode, 8);
+ emit_insn (gen_sse_storehps (m, op1));
}
}
else
rtx prev = PREV_INSN (insn);
while (prev && distance < LEA_SEARCH_THRESHOLD)
{
- if (NONDEBUG_INSN_P (prev))
+ if (INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
&& prev != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (NONDEBUG_INSN_P (prev))
+ if (INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
rtx next = NEXT_INSN (insn);
while (next && distance < LEA_SEARCH_THRESHOLD)
{
- if (NONDEBUG_INSN_P (next))
+ if (INSN_P (next))
{
distance++;
&& next != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (NONDEBUG_INSN_P (next))
+ if (INSN_P (next))
{
distance++;
: gen_x86_64_shld) (high[0], low[0], operands[2]));
}
- emit_insn ((mode == DImode
- ? gen_ashlsi3
- : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
if (TARGET_CMOVE && scratch)
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_1
- : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
- scratch));
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_2
- : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
+ ? gen_x86_shift_adj_2
+ : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
}
void
: gen_ashrdi3) (scratch, scratch,
GEN_INT (single_width - 1)));
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_1
- : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_3
- : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
+ ? gen_x86_shift_adj_3
+ : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
}
}
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_1
- : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shiftsi_adj_2
- : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
+ ? gen_x86_shift_adj_2
+ : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
}
}
case PROCESSOR_NOCONA:
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
- case PROCESSOR_BDVER1:
return 3;
case PROCESSOR_CORE2:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
- case PROCESSOR_BDVER1:
case PROCESSOR_ATOM:
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
}
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. Exact wording is:
-
- An array uses the same alignment as its elements, except that a local or
- global array variable of length at least 16 bytes or
- a C99 variable-length array variable always has alignment of at least 16 bytes.
-
- This was added to allow use of aligned SSE instructions at arrays. This
- rule is meant for static storage (where compiler can not do the analysis
- by itself). We follow it for automatic variables only when convenient.
- We fully control everything in the function compiled and functions from
- other unit can not rely on the alignment.
-
- Exclude va_list type. It is the common case of local array where
- we can not benefit from the alignment. */
- if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
- && TARGET_SSE)
+ to 16byte boundary. */
+ if (TARGET_64BIT)
{
if (AGGREGATE_TYPE_P (type)
- && (TYPE_MAIN_VARIANT (type)
- != TYPE_MAIN_VARIANT (va_list_type_node))
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
us with EAX for the static chain. */
regno = AX_REG;
}
- else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
- {
- /* Thiscall functions use ecx for arguments, which leaves
- us with EAX for the static chain. */
- regno = AX_REG;
- }
else if (ix86_function_regparm (fntype, fndecl) == 3)
{
/* For regparm 3, we have no free call-clobbered registers in
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
};
nargs = 3;
nargs_constant = 2;
break;
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case MULTI_ARG_4_DF2_DI_I:
+ case MULTI_ARG_4_DF2_DI_I1:
+ case MULTI_ARG_4_SF2_SI_I:
+ case MULTI_ARG_4_SF2_SI_I1:
nargs = 4;
nargs_constant = 1;
break;
if it is not available. */
static tree
-ix86_builtin_vectorized_function (tree fndecl, tree type_out,
+ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
tree type_in)
{
enum machine_mode in_mode, out_mode;
int in_n, out_n;
- enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
if (TREE_CODE (type_out) != VECTOR_TYPE
- || TREE_CODE (type_in) != VECTOR_TYPE
- || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
+ || TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;
out_mode = TYPE_MODE (TREE_TYPE (type_out));
/* Returns a decl of a function that implements conversion of an integer vector
- into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
- are the types involved when converting according to CODE.
+ into a floating-point vector, or vice-versa. TYPE is the type of the integer
+ side of the conversion.
Return NULL_TREE if it is not available. */
static tree
-ix86_vectorize_builtin_conversion (unsigned int code,
- tree dest_type, tree src_type)
+ix86_vectorize_builtin_conversion (unsigned int code, tree type)
{
- if (! TARGET_SSE2)
+ if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
return NULL_TREE;
switch (code)
{
case FLOAT_EXPR:
- switch (TYPE_MODE (src_type))
+ switch (TYPE_MODE (type))
{
case V4SImode:
- switch (TYPE_MODE (dest_type))
- {
- case V4SFmode:
- return (TYPE_UNSIGNED (src_type)
- ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
- case V4DFmode:
- return (TYPE_UNSIGNED (src_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
- default:
- return NULL_TREE;
- }
- break;
- case V8SImode:
- switch (TYPE_MODE (dest_type))
- {
- case V8SFmode:
- return (TYPE_UNSIGNED (src_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
- default:
- return NULL_TREE;
- }
- break;
+ return TYPE_UNSIGNED (type)
+ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
default:
return NULL_TREE;
}
case FIX_TRUNC_EXPR:
- switch (TYPE_MODE (dest_type))
+ switch (TYPE_MODE (type))
{
case V4SImode:
- switch (TYPE_MODE (src_type))
- {
- case V4SFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
- case V4DFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
- default:
- return NULL_TREE;
- }
- break;
-
- case V8SImode:
- switch (TYPE_MODE (src_type))
- {
- case V8SFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
- default:
- return NULL_TREE;
- }
- break;
-
+ return TYPE_UNSIGNED (type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
default:
return NULL_TREE;
}
-
default:
return NULL_TREE;
- }
- return NULL_TREE;
+ }
}
/* Returns a code for a target-specific builtin that implements
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
regno = aggr ? DX_REG : CX_REG;
- else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
- {
- regno = CX_REG;
- if (aggr)
- return gen_rtx_MEM (SImode,
- plus_constant (stack_pointer_rtx, 4));
- }
else
{
regno = AX_REG;
/* Adjust the this parameter by a fixed constant. */
if (delta)
{
- xops[0] = GEN_INT (delta);
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ bool sub = delta < 0 || delta == 128;
+ xops[0] = GEN_INT (sub ? -delta : delta);
xops[1] = this_reg ? this_reg : this_param;
if (TARGET_64BIT)
{
xops[0] = tmp;
xops[1] = this_param;
}
- if (x86_maybe_negate_const_int (&xops[0], DImode))
+ if (sub)
output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
else
output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
}
- else if (x86_maybe_negate_const_int (&xops[0], SImode))
+ else if (sub)
output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
else
output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
{
int tmp_regno = CX_REG;
if (lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (function)))
- || lookup_attribute ("thiscall",
- TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
tmp_regno = AX_REG;
tmp = gen_rtx_REG (SImode, tmp_regno);
}
replace = true;
/* Empty functions get branch mispredict even when the jump destination
is not visible to us. */
- if (!prev && !optimize_function_for_size_p (cfun))
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
replace = true;
}
if (replace)
extended_reg_mentioned_1, NULL);
}
-/* If profitable, negate (without causing overflow) integer constant
- of mode MODE at location LOC. Return true in this case. */
-bool
-x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
-{
- HOST_WIDE_INT val;
-
- if (!CONST_INT_P (*loc))
- return false;
-
- switch (mode)
- {
- case DImode:
- /* DImode x86_64 constants must fit in 32 bits. */
- gcc_assert (x86_64_immediate_operand (*loc, mode));
-
- mode = SImode;
- break;
-
- case SImode:
- case HImode:
- case QImode:
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Avoid overflows. */
- if (mode_signbit_p (mode, *loc))
- return false;
-
- val = INTVAL (*loc);
-
- /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
- Exceptions: -128 encodes smaller than 128, so swap sign and op. */
- if ((val < 0 && val != -128)
- || val == 128)
- {
- *loc = GEN_INT (-val);
- return true;
- }
-
- return false;
-}
-
/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
optabs would emit if we didn't have TFmode patterns. */
/* Fastcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* Thiscall attribute says callee is responsible for popping arguments
- if they are not variable. */
- { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Cdecl attribute says the callee is a normal C declaration */
{ "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Regparm attribute specifies how many integer arguments are to be
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
-#undef TARGET_FUNCTION_VALUE_REGNO_P
-#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
-
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
@c Copyright (C) 1988,1989,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,
-@c 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+@c 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@c Free Software Foundation, Inc.
@c This is part of the GCC manual.
@c For copying conditions, see the file gcc.texi.
@defmac MD_EXEC_PREFIX
If defined, this macro is an additional prefix to try after
@code{STANDARD_EXEC_PREFIX}. @code{MD_EXEC_PREFIX} is not searched
-when the compiler is built as a cross
+when the @option{-b} option is used, or the compiler is built as a cross
compiler. If you define @code{MD_EXEC_PREFIX}, then be sure to add it
to the list of directories used to find the assembler in @file{configure.in}.
@end defmac
@defmac MD_STARTFILE_PREFIX
If defined, this macro supplies an additional prefix to try after the
standard prefixes. @code{MD_EXEC_PREFIX} is not searched when the
-compiler is built as a cross compiler.
+@option{-b} option is used, or when the compiler is built as a cross
+compiler.
@end defmac
@defmac MD_STARTFILE_PREFIX_1
If defined, this macro supplies yet another prefix to try after the
-standard prefixes. It is not searched when the compiler is built as a
-cross compiler.
+standard prefixes. It is not searched when the @option{-b} option is
+used, or when the compiler is built as a cross compiler.
@end defmac
@defmac INIT_ENVIRONMENT
the highest numbered allocable register first.
@end defmac
-@defmac ADJUST_REG_ALLOC_ORDER
+@defmac ORDER_REGS_FOR_LOCAL_ALLOC
A C statement (sans semicolon) to choose the order in which to allocate
hard registers for pseudo-registers local to a basic block.
On most machines, it is not necessary to define this macro.
@end defmac
-@defmac HONOR_REG_ALLOC_ORDER
-Normally, IRA tries to estimate the costs for saving a register in the
-prologue and restoring it in the epilogue. This discourages it from
-using call-saved registers. If a machine wants to ensure that IRA
-allocates registers in the order given by REG_ALLOC_ORDER even if some
-call-saved registers appear earlier than call-used ones, this macro
-should be defined.
-@end defmac
-
@defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno})
In some case register allocation order is not enough for the
Integrated Register Allocator (@acronym{IRA}) to generate a good code.
is @code{BITS_PER_WORD} bits wide is correct for your machine.
@end defmac
+@defmac SMALL_REGISTER_CLASSES
+On some machines, it is risky to let hard registers live across arbitrary
+insns. Typically, these machines have instructions that require values
+to be in specific registers (like an accumulator), and reload will fail
+if the required hard register is used for another purpose across such an
+insn.
+
+Define @code{SMALL_REGISTER_CLASSES} to be an expression with a nonzero
+value on these machines. When this macro has a nonzero value, the
+compiler will try to minimize the lifetime of hard registers.
+
+It is always safe to define this macro with a nonzero value, but if you
+unnecessarily define it, you will reduce the amount of optimizations
+that can be performed in some cases. If you do not define this macro
+with a nonzero value when it is required, the compiler will run out of
+spill registers and print a fatal error message. For most machines, you
+should not define this macro at all.
+@end defmac
+
@defmac CLASS_LIKELY_SPILLED_P (@var{class})
A C expression whose value is nonzero if pseudos that have been assigned
to registers of class @var{class} would likely be spilled because
must have move patterns for this mode.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P (enum machine_mode @var{mode})
-Define this to return nonzero for machine modes for which the port has
-small register classes. If this target hook returns nonzero for a given
-@var{mode}, the compiler will try to minimize the lifetime of registers
-in @var{mode}. The hook may be called with @code{VOIDmode} as argument.
-In this case, the hook is expected to return nonzero if it returns nonzero
-for any mode.
-
-On some machines, it is risky to let hard registers live across arbitrary
-insns. Typically, these machines have instructions that require values
-to be in specific registers (like an accumulator), and reload will fail
-if the required hard register is used for another purpose across such an
-insn.
-
-Passes before reload do not know which hard registers will be used
-in an instruction, but the machine modes of the registers set or used in
-the instruction are already known. And for some machines, register
-classes are small for, say, integer registers but not for floating point
-registers. For example, the AMD x86-64 architecture requires specific
-registers for the legacy x86 integer instructions, but there are many
-SSE registers for floating point operations. On such targets, a good
-strategy may be to return nonzero from this hook for @code{INTEGRAL_MODE_P}
-machine modes but zero for the SSE register classes.
-
-The default version of this hook retuns false for any mode. It is always
-safe to redefine this hook to return with a nonzero value. But if you
-unnecessarily define it, you will reduce the amount of optimizations
-that can be performed in some cases. If you do not define this hook
-to return a nonzero value when it is required, the compiler will run out
-of spill registers and print a fatal error message.
-@end deftypefn
-
@node Scalar Return
@subsection How Scalar Function Values Are Returned
@cindex return values in registers
If the machine has register windows, so that the caller and the called
function use different registers for the return value, this macro
should recognize only the caller's register numbers.
-
-This macro has been deprecated. Use @code{TARGET_FUNCTION_VALUE_REGNO_P}
-for a new target instead.
@end defmac
-@deftypefn {Target Hook} bool TARGET_FUNCTION_VALUE_REGNO_P (const unsigned int @var{regno})
-A target hook that return @code{true} if @var{regno} is the number of a hard
-register in which the values of called function may come back.
-
-A register whose use for returning values is limited to serving as the
-second of a pair (for a value of type @code{double}, say) need not be
-recognized by this target hook.
-
-If the machine has register windows, so that the caller and the called
-function use different registers for the return value, this target hook
-should recognize only the caller's register numbers.
-
-If this hook is not defined, then FUNCTION_VALUE_REGNO_P will be used.
-@end deftypefn
-
@defmac TARGET_ENUM_VA_LIST (@var{idx}, @var{pname}, @var{ptype})
This target macro is used in function @code{c_common_nodes_and_builtins}
to iterate through the target specific builtin types for va_list. The
address; but often a machine-dependent strategy can generate better code.
@end defmac
-@deftypefn {Target Hook} bool TARGET_MODE_DEPENDENT_ADDRESS_P (const_rtx @var{addr})
-This hook returns @code{true} if memory address @var{addr} can have
-different meanings depending on the machine mode of the memory
-reference it is used for or if the address is valid for some modes
-but not others.
-
-Autoincrement and autodecrement addresses typically have mode-dependent
-effects because the amount of the increment or decrement is the size
-of the operand being addressed. Some machines have other mode-dependent
-addresses. Many RISC machines have no mode-dependent addresses.
-
-You may assume that @var{addr} is a valid address for the machine.
-
-The default version of this hook returns @code{false}.
-@end deftypefn
-
@defmac GO_IF_MODE_DEPENDENT_ADDRESS (@var{addr}, @var{label})
A C statement or compound statement with a conditional @code{goto
@var{label};} executed if memory address @var{x} (an RTX) can have
addresses. Many RISC machines have no mode-dependent addresses.
You may assume that @var{addr} is a valid address for the machine.
-
-These are obsolete macros, replaced by the
-@code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook.
@end defmac
@defmac LEGITIMATE_CONSTANT_P (@var{x})
Return true if a vector created for @code{builtin_vec_perm} is valid.
@end deftypefn
-@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_CONVERSION (unsigned @var{code}, tree @var{dest_type}, tree @var{src_type})
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_CONVERSION (unsigned @var{code}, tree @var{type})
This hook should return the DECL of a function that implements conversion of the
-input vector of type @var{src_type} to type @var{dest_type}.
+input vector of type @var{type}.
+If @var{type} is an integral type, the result of the conversion is a vector of
+floating-point type of the same size.
+If @var{type} is a floating-point type, the result of the conversion is a vector
+of integral type of the same size.
The value of @var{code} is one of the enumerators in @code{enum tree_code} and
specifies how the conversion is to be applied
(truncation, rounding, etc.).
conversion. Otherwise, it will return @code{NULL_TREE}.
@end deftypefn
-@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (tree @var{fndecl}, tree @var{vec_type_out}, tree @var{vec_type_in})
-This hook should return the decl of a function that implements the
-vectorized variant of the builtin function with builtin function code
-@var{code} or @code{NULL_TREE} if such a function is not available.
-The value of @var{fndecl} is the builtin function declaration. The
-return type of the vectorized function shall be of vector type
-@var{vec_type_out} and the argument types should be @var{vec_type_in}.
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in})
+This hook should return the decl of a function that implements the vectorized
+variant of the builtin function with builtin function code @var{code} or
+@code{NULL_TREE} if such a function is not available. The value of @var{code}
+is one of the enumerators in @code{enum built_in_function}. The return type of
+the vectorized function shall be of vector type @var{vec_type_out} and the
+argument types should be @var{vec_type_in}.
@end deftypefn
@deftypefn {Target Hook} bool TARGET_SUPPORT_VECTOR_MISALIGNMENT (enum machine_mode @var{mode}, const_tree @var{type}, int @var{misalignment}, bool @var{is_packed})
uninitialized, writable small data.
@end defmac
-@defmac TLS_COMMON_ASM_OP
-If defined, a C expression whose value is a string containing the
-assembler operation to identify the following data as thread-local
-common data. The default is @code{".tls_common"}.
-@end defmac
-
-@defmac TLS_SECTION_ASM_FLAG
-If defined, a C expression whose value is a character constant
-containing the flag used to mark a section as a TLS section. The
-default is @code{'T'}.
-@end defmac
-
@defmac INIT_SECTION_ASM_OP
If defined, a C expression whose value is a string, including spacing,
containing the assembler operation to identify the following data as
this function.
@end deftypefun
-@deftypefn {Target Hook} void TARGET_ASM_LTO_START (void)
-Output to @code{asm_out_file} any text which the assembler expects
-to find at the start of an LTO section. The default is to output
-nothing.
-@end deftypefn
-
-@deftypefn {Target Hook} void TARGET_ASM_LTO_END (void)
-Output to @code{asm_out_file} any text which the assembler expects
-to find at the end of an LTO section. The default is to output
-nothing.
-@end deftypefn
-
@deftypefn {Target Hook} void TARGET_ASM_CODE_END (void)
Output to @code{asm_out_file} any text which is needed before emitting
unwind info and debug info at the end of a file. Some targets emit
take.
@end deftypevr
-@deftypefn {Target Hook} bool TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P (const_tree @var{name})
-If defined, this target hook is a function which returns true if the
-machine-specific attribute named @var{name} expects an identifier
-given as its first argument to be passed on as a plain identifier, not
-subjected to name lookup. If this is not defined, the default is
-false for all machine-specific attributes.
-@end deftypefn
-
@deftypefn {Target Hook} int TARGET_COMP_TYPE_ATTRIBUTES (const_tree @var{type1}, const_tree @var{type2})
If defined, this target hook is a function which returns zero if the attributes on
@var{type1} and @var{type2} are incompatible, one if they are compatible,
@var{arglist} really has type @samp{VEC(tree,gc)*}
@end deftypefn
-@deftypefn {Target Hook} tree TARGET_FOLD_BUILTIN (tree @var{fndecl}, int @var{n_args}, tree *@var{argp}, bool @var{ignore})
+@deftypefn {Target Hook} tree TARGET_FOLD_BUILTIN (tree @var{fndecl}, tree @var{arglist}, bool @var{ignore})
Fold a call to a machine specific built-in function that was set up by
@samp{TARGET_INIT_BUILTINS}. @var{fndecl} is the declaration of the
-built-in function. @var{n_args} is the number of arguments passed to
-the function; the arguments themselves are pointed to by @var{argp}.
-The result is another tree containing a simplified expression for the
-call's result. If @var{ignore} is true the value will be ignored.
+built-in function. @var{arglist} is the list of arguments passed to
+the built-in function. The result is another tree containing a
+simplified expression for the call's result. If @var{ignore} is true
+the value will be ignored.
@end deftypefn
@deftypefn {Target Hook} {const char *} TARGET_INVALID_WITHIN_DOLOOP (const_rtx @var{insn})
modes and they have different conditional execution capability, such as ARM.
@end deftypefn
-@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
-This target hook returns a new value for the number of times @var{loop}
-should be unrolled. The parameter @var{nunroll} is the number of times
-the loop is to be unrolled. The parameter @var{loop} is a pointer to
-the loop, which is going to be checked for unrolling. This target hook
-is required only when the target has special constraints like maximum
-number of memory accesses.
-@end deftypefn
-
@defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications