#include "langhooks.h"
#include "df.h"
#include "intl.h"
+#include "libfuncs.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
const_tree, int);
static bool arm_return_in_memory (const_tree, const_tree);
static rtx arm_function_value (const_tree, const_tree, bool);
-static rtx arm_libcall_value (enum machine_mode, rtx);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
+static bool arm_have_conditional_execution (void);
static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_output_ttype (rtx);
#endif
static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
+static rtx arm_dwarf_register_span (rtx);
static tree arm_cxx_guard_type (void);
static bool arm_cxx_guard_mask_bit (void);
static tree arm_convert_to_type (tree type, tree expr);
static bool arm_scalar_mode_supported_p (enum machine_mode);
static bool arm_frame_pointer_required (void);
+static bool arm_can_eliminate (const int, const int);
+static void arm_asm_trampoline_template (FILE *);
+static void arm_trampoline_init (rtx, tree, rtx);
+static rtx arm_trampoline_adjust_address (rtx);
\f
/* Table of machine attributes. */
#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
+
#undef TARGET_DEFAULT_SHORT_ENUMS
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
#define TARGET_HAVE_TLS true
#endif
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arm_can_eliminate
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Obstack for minipool constant handling. */
/* The default processor used if not overridden by commandline. */
static enum processor_type arm_default_cpu = arm_none;
-/* Which floating point model to use. */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available. */
-enum fputype arm_fpu_arch;
-
/* Which floating point hardware to schedule for. */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use. */
+const struct arm_fpu_desc *arm_fpu_desc;
/* Whether to use floating point hardware. */
enum float_abi_type arm_float_abi;
#define FL_DIV (1 << 18) /* Hardware divide. */
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
#define FL_NEON (1 << 20) /* Neon instructions. */
+#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
+ architecture. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
/* The bits in this mask specify which
instructions we are allowed to generate. */
/* Nonzero if instructions not present in the 'M' profile can be used. */
int arm_arch_notm = 0;
+/* Nonzero if instructions present in ARMv7E-M can be used. */
+int arm_arch7em = 0;
+
/* Nonzero if this chip can benefit from load scheduling. */
int arm_ld_sched = 0;
{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
{"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
+ {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
{"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
{"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
{"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
char arm_arch_name[] = "__ARM_ARCH_0UNK__";
-struct fpu_desc
-{
- const char * name;
- enum fputype fpu;
-};
-
-
/* Available values for -mfpu=. */
-static const struct fpu_desc all_fpus[] =
-{
- {"fpa", FPUTYPE_FPA},
- {"fpe2", FPUTYPE_FPA_EMU2},
- {"fpe3", FPUTYPE_FPA_EMU2},
- {"maverick", FPUTYPE_MAVERICK},
- {"vfp", FPUTYPE_VFP},
- {"vfp3", FPUTYPE_VFP3},
- {"vfpv3", FPUTYPE_VFP3},
- {"vfpv3-d16", FPUTYPE_VFP3D16},
- {"neon", FPUTYPE_NEON},
- {"neon-fp16", FPUTYPE_NEON_FP16}
-};
-
-
-/* Floating point models used by the different hardware.
- See fputype in arm.h. */
-
-static const enum arm_fp_model fp_model_for_fpu[] =
-{
- /* No FP hardware. */
- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
- ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
- ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
+static const struct arm_fpu_desc all_fpus[] =
+{
+ {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
+ {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
+ {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
+ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
+ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
+ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
+ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
+ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
+ /* Compatibility aliases. */
+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
};
default:
break;
}
+
+ if (TARGET_AAPCS_BASED)
+ synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
}
/* On AAPCS systems, this is the "struct __va_list". */
arm_arch6 = (insn_flags & FL_ARCH6) != 0;
arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
error ("iwmmxt abi requires an iwmmxt capable cpu");
- arm_fp_model = ARM_FP_MODEL_UNKNOWN;
if (target_fpu_name == NULL && target_fpe_name != NULL)
{
if (streq (target_fpe_name, "2"))
error ("invalid floating point emulation option: -mfpe=%s",
target_fpe_name);
}
- if (target_fpu_name != NULL)
- {
- /* The user specified a FPU. */
- for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
- {
- if (streq (all_fpus[i].name, target_fpu_name))
- {
- arm_fpu_arch = all_fpus[i].fpu;
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- break;
- }
- }
- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
- error ("invalid floating point option: -mfpu=%s", target_fpu_name);
- }
- else
+
+ if (target_fpu_name == NULL)
{
#ifdef FPUTYPE_DEFAULT
- /* Use the default if it is specified for this platform. */
- arm_fpu_arch = FPUTYPE_DEFAULT;
- arm_fpu_tune = FPUTYPE_DEFAULT;
+ target_fpu_name = FPUTYPE_DEFAULT;
#else
- /* Pick one based on CPU type. */
- /* ??? Some targets assume FPA is the default.
- if ((insn_flags & FL_VFP) != 0)
- arm_fpu_arch = FPUTYPE_VFP;
- else
- */
if (arm_arch_cirrus)
- arm_fpu_arch = FPUTYPE_MAVERICK;
+ target_fpu_name = "maverick";
else
- arm_fpu_arch = FPUTYPE_FPA_EMU2;
+ target_fpu_name = "fpe2";
#endif
- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
- arm_fpu_tune = FPUTYPE_FPA;
+ }
+
+ arm_fpu_desc = NULL;
+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+ {
+ if (streq (all_fpus[i].name, target_fpu_name))
+ {
+ arm_fpu_desc = &all_fpus[i];
+ break;
+ }
+ }
+
+ if (!arm_fpu_desc)
+ {
+ error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+ return;
+ }
+
+ switch (arm_fpu_desc->model)
+ {
+ case ARM_FP_MODEL_FPA:
+ if (arm_fpu_desc->rev == 2)
+ arm_fpu_attr = FPU_FPE2;
+ else if (arm_fpu_desc->rev == 3)
+ arm_fpu_attr = FPU_FPE3;
else
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+ arm_fpu_attr = FPU_FPA;
+ break;
+
+ case ARM_FP_MODEL_MAVERICK:
+ arm_fpu_attr = FPU_MAVERICK;
+ break;
+
+ case ARM_FP_MODEL_VFP:
+ arm_fpu_attr = FPU_VFP;
+ break;
+
+ default:
+ gcc_unreachable();
}
if (target_float_abi_name != NULL)
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
if (TARGET_AAPCS_BASED
- && (arm_fp_model == ARM_FP_MODEL_FPA))
+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
error ("FPA is unsupported in the AAPCS");
if (TARGET_AAPCS_BASED)
/* If soft-float is specified then don't use FPU. */
if (TARGET_SOFT_FLOAT)
- arm_fpu_arch = FPUTYPE_NONE;
+ arm_fpu_attr = FPU_NONE;
if (TARGET_AAPCS_BASED)
{
/* For arm2/3 there is no need to do any scheduling if there is only
a floating point emulator, or we are doing software floating-point. */
if ((TARGET_SOFT_FLOAT
- || arm_fpu_tune == FPUTYPE_FPA_EMU2
- || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+ || (TARGET_FPA && arm_fpu_desc->rev))
&& (tune_flags & FL_MODE32) == 0)
flag_schedule_insns = flag_schedule_insns_after_reload = 0;
/* Use the cp15 method if it is available. */
if (target_thread_pointer == TP_AUTO)
{
- if (arm_arch6k && !TARGET_THUMB)
+ if (arm_arch6k && !TARGET_THUMB1)
target_thread_pointer = TP_CP15;
else
target_thread_pointer = TP_SOFT;
fix_cm3_ldrd = 0;
}
- /* ??? We might want scheduling for thumb2. */
- if (TARGET_THUMB && flag_schedule_insns)
+ if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
flag_schedule_insns = 0;
max_insns_skipped = 3;
}
+ /* Hot/Cold partitioning is not currently supported, since we can't
+ handle literal pool placement in that case. */
+ if (flag_reorder_blocks_and_partition)
+ {
+ inform (input_location,
+ "-freorder-blocks-and-partition not supported on this architecture");
+ flag_reorder_blocks_and_partition = 0;
+ flag_reorder_blocks = 1;
+ }
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
}
\f
+/* Output assembler code for a block containing the constant parts
+ of a trampoline, leaving space for the variable parts.
+
+ On the ARM, (if r8 is the static chain regnum, and remembering that
+ referencing pc adds an offset of 8) the trampoline looks like:
+ ldr r8, [pc, #0]
+ ldr pc, [pc]
+ .word static chain value
+ .word function's address
+ XXX FIXME: When the trampoline returns, r8 will be clobbered. */
+
+static void
+arm_asm_trampoline_template (FILE *f)
+{
+ if (TARGET_ARM)
+ {
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else if (TARGET_THUMB2)
+ {
+ /* The Thumb-2 trampoline is similar to the arm implementation.
+ Unlike 16-bit Thumb, we enter the stub in thumb mode. */
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+ STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else
+ {
+ ASM_OUTPUT_ALIGN (f, 2);
+ fprintf (f, "\t.code\t16\n");
+ fprintf (f, ".Ltrampoline_start:\n");
+ asm_fprintf (f, "\tpush\t{r0, r1}\n");
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
+ asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
+ }
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline. */
+
+static void
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+ rtx fnaddr, mem, a_tramp;
+
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
+ emit_move_insn (mem, chain_value);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
+ fnaddr = XEXP (DECL_RTL (fndecl), 0);
+ emit_move_insn (mem, fnaddr);
+
+ a_tramp = XEXP (m_tramp, 0);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+ LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
+ plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+/* Thumb trampolines should be entered in thumb mode, so set
+ the bottom bit of the address. */
+
+static rtx
+arm_trampoline_adjust_address (rtx addr)
+{
+ if (TARGET_THUMB)
+ addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
+ NULL, 0, OPTAB_LIB_WIDEN);
+ return addr;
+}
+\f
/* Return 1 if it is possible to return using a single instruction.
If SIBLING is non-null, this is a test for a return before a sibling
call. SIBLING is the call insn, so we can examine its register usage. */
1);
}
-/* Return the number of ARM instructions required to synthesize the given
- constant. */
+/* Return the number of instructions required to synthesize the given
+ constant, if we start emitting them from bit-position I. */
static int
count_insns_for_constant (HOST_WIDE_INT remainder, int i)
{
HOST_WIDE_INT temp1;
+ int step_size = TARGET_ARM ? 2 : 1;
int num_insns = 0;
+
+ gcc_assert (TARGET_ARM || i == 0);
+
do
{
int end;
if (i <= 0)
i += 32;
- if (remainder & (3 << (i - 2)))
+ if (remainder & (((1 << step_size) - 1) << (i - step_size)))
{
end = i - 8;
if (end < 0)
| ((i < end) ? (0xff >> (32 - end)) : 0));
remainder &= ~temp1;
num_insns++;
- i -= 6;
+ i -= 8 - step_size;
}
- i -= 2;
+ i -= step_size;
} while (remainder);
return num_insns;
}
+static int
+find_best_start (unsigned HOST_WIDE_INT remainder)
+{
+ int best_consecutive_zeros = 0;
+ int i;
+ int best_start = 0;
+
+ /* If we aren't targetting ARM, the best place to start is always at
+ the bottom. */
+ if (! TARGET_ARM)
+ return 0;
+
+ for (i = 0; i < 32; i += 2)
+ {
+ int consecutive_zeros = 0;
+
+ if (!(remainder & (3 << i)))
+ {
+ while ((i < 32) && !(remainder & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
+ }
+ }
+
+ /* So long as it won't require any more insns to do so, it's
+ desirable to emit a small constant (in bits 0...9) in the last
+ insn. This way there is more chance that it can be combined with
+ a later addressing insn to form a pre-indexed load or store
+ operation. Consider:
+
+ *((volatile int *)0xe0000100) = 1;
+ *((volatile int *)0xe0000110) = 2;
+
+ We want this to wind up as:
+
+ mov rA, #0xe0000000
+ mov rB, #1
+ str rB, [rA, #0x100]
+ mov rB, #2
+ str rB, [rA, #0x110]
+
+ rather than having to synthesize both large constants from scratch.
+
+ Therefore, we calculate how many insns would be required to emit
+ the constant starting from `best_start', and also starting from
+ zero (i.e. with bit 31 first to be output). If `best_start' doesn't
+ yield a shorter sequence, we may as well use zero. */
+ if (best_start != 0
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+ && (count_insns_for_constant (remainder, 0) <=
+ count_insns_for_constant (remainder, best_start)))
+ best_start = 0;
+
+ return best_start;
+}
+
/* Emit an instruction with the indicated PATTERN. If COND is
non-NULL, conditionalize the execution of the instruction on COND
being true. */
{
int can_invert = 0;
int can_negate = 0;
+ int final_invert = 0;
int can_negate_initial = 0;
int can_shift = 0;
int i;
int insns = 0;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+ int step_size = TARGET_ARM ? 2 : 1;
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
return 1;
}
- /* We don't know how to handle other cases yet. */
- gcc_assert (remainder == 0xffffffff);
-
- if (generate)
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, target,
- gen_rtx_NOT (mode, source)));
- return 1;
+ if (remainder == 0xffffffff)
+ {
+ if (generate)
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ return 1;
+ }
+ break;
case MINUS:
/* We treat MINUS as (val - source), since (source - val) is always
if ((code == AND)
|| (code != IOR && can_invert && num_bits_set > 16))
- remainder = (~remainder) & 0xffffffff;
+ remainder ^= 0xffffffff;
else if (code == PLUS && num_bits_set > 16)
remainder = (-remainder) & 0xffffffff;
+
+ /* For XOR, if more than half the bits are set and there's a sequence
+ of more than 8 consecutive ones in the pattern then we can XOR by the
+ inverted constant and then invert the final result; this may save an
+ instruction and might also lead to the final mvn being merged with
+ some other operation. */
+ else if (code == XOR && num_bits_set > 16
+ && (count_insns_for_constant (remainder ^ 0xffffffff,
+ find_best_start
+ (remainder ^ 0xffffffff))
+ < count_insns_for_constant (remainder,
+ find_best_start (remainder))))
+ {
+ remainder ^= 0xffffffff;
+ final_invert = 1;
+ }
else
{
can_invert = 0;
/* ??? Use thumb2 replicated constants when the high and low halfwords are
the same. */
{
- int best_start = 0;
- if (!TARGET_THUMB2)
- {
- int best_consecutive_zeros = 0;
-
- for (i = 0; i < 32; i += 2)
- {
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
- {
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
- {
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
- }
- i -= 2;
- }
- }
-
- /* So long as it won't require any more insns to do so, it's
- desirable to emit a small constant (in bits 0...9) in the last
- insn. This way there is more chance that it can be combined with
- a later addressing insn to form a pre-indexed load or store
- operation. Consider:
-
- *((volatile int *)0xe0000100) = 1;
- *((volatile int *)0xe0000110) = 2;
-
- We want this to wind up as:
-
- mov rA, #0xe0000000
- mov rB, #1
- str rB, [rA, #0x100]
- mov rB, #2
- str rB, [rA, #0x110]
-
- rather than having to synthesize both large constants from scratch.
-
- Therefore, we calculate how many insns would be required to emit
- the constant starting from `best_start', and also starting from
- zero (i.e. with bit 31 first to be output). If `best_start' doesn't
- yield a shorter sequence, we may as well use zero. */
- if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
- }
-
/* Now start emitting the insns. */
- i = best_start;
+ i = find_best_start (remainder);
do
{
int end;
}
else
{
- if (remainder && subtargets)
+ if ((final_invert || remainder) && subtargets)
new_src = gen_reg_rtx (mode);
else
new_src = target;
code = PLUS;
insns++;
- if (TARGET_ARM)
- i -= 6;
- else
- i -= 7;
+ i -= 8 - step_size;
}
/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
shifts. */
- if (TARGET_ARM)
- i -= 2;
- else
- i--;
+ i -= step_size;
}
while (remainder);
}
+ if (final_invert)
+ {
+ if (generate)
+ emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ insns++;
+ }
+
return insns;
}
}
static bool
-arm_libcall_uses_aapcs_base (rtx libcall)
+arm_libcall_uses_aapcs_base (const_rtx libcall)
{
static bool init_done = false;
static htab_t libcall_htab;
}
rtx
-arm_libcall_value (enum machine_mode mode, rtx libcall)
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
{
if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
&& GET_MODE_CLASS (mode) == MODE_FLOAT)
{
{"aapcs", ARM_PCS_AAPCS},
{"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+ /* We could recognize these, but changes would be needed elsewhere
+ * to implement them. */
{"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
{"atpcs", ARM_PCS_ATPCS},
{"apcs", ARM_PCS_APCS},
+#endif
{NULL, ARM_PCS_UNKNOWN}
};
return -1;
}
+/* Return true if PCS_VARIANT should use VFP registers. */
static bool
-aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
- int *base_mode,
- int *count)
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
{
+ if (pcs_variant == ARM_PCS_AAPCS_VFP)
+ return true;
+
+ if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+ return false;
+
+ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+ (TARGET_VFP_DOUBLE || !is_double));
+}
+
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type,
+ enum machine_mode *base_mode, int *count)
+{
+ enum machine_mode new_mode = VOIDmode;
+
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
{
*count = 1;
- *base_mode = mode;
- return true;
+ new_mode = mode;
}
else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
{
*count = 2;
- *base_mode = (mode == DCmode ? DFmode : SFmode);
- return true;
+ new_mode = (mode == DCmode ? DFmode : SFmode);
}
else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
{
- enum machine_mode aggregate_mode = VOIDmode;
- int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
+ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
if (ag_count > 0 && ag_count <= 4)
- {
- *count = ag_count;
- *base_mode = aggregate_mode;
- return true;
- }
+ *count = ag_count;
+ else
+ return false;
}
- return false;
+ else
+ return false;
+
+
+ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+ return false;
+
+ *base_mode = new_mode;
+ return true;
}
static bool
enum machine_mode mode, const_tree type)
{
int count ATTRIBUTE_UNUSED;
- int ag_mode ATTRIBUTE_UNUSED;
+ enum machine_mode ag_mode ATTRIBUTE_UNUSED;
- if (!(pcs_variant == ARM_PCS_AAPCS_VFP
- || (pcs_variant == ARM_PCS_AAPCS_LOCAL
- && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ if (!use_vfp_abi (pcs_variant, false))
return false;
- return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
+ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
}
static bool
aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
const_tree type)
{
- if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
- || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
- && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ if (!use_vfp_abi (pcum->pcs_variant, false))
return false;
- return aapcs_vfp_is_call_or_return_candidate (mode, type,
+
+ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
&pcum->aapcs_vfp_rmode,
&pcum->aapcs_vfp_rcount);
}
enum machine_mode mode,
const_tree type ATTRIBUTE_UNUSED)
{
- if (!(pcs_variant == ARM_PCS_AAPCS_VFP
- || (pcs_variant == ARM_PCS_AAPCS_LOCAL
- && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ if (!use_vfp_abi (pcs_variant, false))
return false;
+
if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
{
int count;
- int ag_mode;
+ enum machine_mode ag_mode;
int i;
rtx par;
int shift;
- aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
+ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
if (!TARGET_NEON)
{
{
if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
{
- warning (OPT_Wattributes, "%qE attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
*no_add_attrs = true;
}
return NULL_TREE;
else
address = reg;
- if (TARGET_ARM)
- emit_insn (gen_pic_load_addr_arm (address, orig));
- else if (TARGET_THUMB2)
- emit_insn (gen_pic_load_addr_thumb2 (address, orig));
+ if (TARGET_32BIT)
+ emit_insn (gen_pic_load_addr_32bit (address, orig));
else /* TARGET_THUMB1 */
emit_insn (gen_pic_load_addr_thumb1 (address, orig));
{
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
UNSPEC_GOTSYM_OFF);
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
- if (TARGET_ARM)
- {
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
- }
- else if (TARGET_THUMB2)
+ if (TARGET_32BIT)
{
- /* Thumb-2 only allows very limited access to the PC. Calculate the
- address in a temporary register. */
- if (arm_pic_register != INVALID_REGNUM)
- {
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
- }
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+ if (TARGET_ARM)
+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
else
- {
- gcc_assert (can_create_pseudo_p ());
- pic_tmp = gen_reg_rtx (Pmode);
- }
-
- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
}
else /* TARGET_THUMB1 */
{
if (TARGET_ARM)
emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
else if (TARGET_THUMB2)
- {
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- }
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
else /* TARGET_THUMB1 */
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
if (TARGET_ARM)
emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
else if (TARGET_THUMB2)
- {
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- emit_move_insn (reg, gen_const_mem (SImode, reg));
- }
+ emit_insn (gen_tls_load_dot_plus_four (reg, reg, labelno));
else
{
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
else if ((outer == PLUS || outer == COMPARE)
&& INTVAL (x) < 256 && INTVAL (x) > -256)
return 0;
- else if (outer == AND
+ else if ((outer == IOR || outer == XOR || outer == AND)
&& INTVAL (x) < 256 && INTVAL (x) >= -256)
return COSTS_N_INSNS (1);
+ else if (outer == AND)
+ {
+ int i;
+ /* This duplicates the tests in the andsi3 expander. */
+ for (i = 9; i <= 31; i++)
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+ return COSTS_N_INSNS (2);
+ }
else if (outer == ASHIFT || outer == ASHIFTRT
|| outer == LSHIFTRT)
return 0;
case UMOD:
if (TARGET_HARD_FLOAT && mode == SFmode)
*total = COSTS_N_INSNS (2);
- else if (TARGET_HARD_FLOAT && mode == DFmode)
+ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
*total = COSTS_N_INSNS (4);
else
*total = COSTS_N_INSNS (20);
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
case NEG:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
return false;
case ABS:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
return false;
return true;
case CONST_DOUBLE:
- if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (4);
return false;
case MINUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case PLUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case NEG:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case ABS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
return false;
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
*total = COSTS_N_INSNS (1);
return false;
}
}
-/* Initialize a vector with non-constant elements. FIXME: We can do better
- than the current implementation (building a vector on the stack and then
- loading it) in many cases. See rs6000.c. */
+/* If VALS is a vector constant that can be loaded into a register
+ using VDUP, generate instructions to do so and return an RTX to
+ assign to the register. Otherwise return NULL_RTX. */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true;
+ rtx x;
+ int i;
+
+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+ return NULL_RTX;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (!all_same)
+ /* The elements are not all the same. We could handle repeating
+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+ {0, C, 0, C, 0, C, 0, C} which can be loaded using
+ vdup.i16). */
+ return NULL_RTX;
+
+ /* We can load this constant by using VDUP and a constant in a
+ single ARM register. This will be cheaper than a vector
+ load. */
+
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, efficiently into a
+ register. Returns an RTX to copy into the register, or NULL_RTX
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */
+
+rtx
+neon_make_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ rtx target;
+ rtx const_vec = NULL_RTX;
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_const = 0;
+ int i;
+
+ if (GET_CODE (vals) == CONST_VECTOR)
+ const_vec = vals;
+ else if (GET_CODE (vals) == PARALLEL)
+ {
+ /* A CONST_VECTOR must contain only CONST_INTs and
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+ Only store valid constants in a CONST_VECTOR. */
+ for (i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ n_const++;
+ }
+ if (n_const == n_elts)
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ }
+ else
+ gcc_unreachable ();
+
+ if (const_vec != NULL
+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */
+ return const_vec;
+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
+ pipeline cycle; creating the constant takes one or two ARM
+ pipeline cycles. */
+ return target;
+ else if (const_vec != NULL_RTX)
+ /* Load from constant pool. On Cortex-A8 this takes two cycles
+ (for either double or quad vectors). We can not take advantage
+ of single-cycle VLD1 because we need a PC-relative addressing
+ mode. */
+ return const_vec;
+ else
+ /* A PARALLEL containing something not valid inside CONST_VECTOR.
+ We can not construct an initializer. */
+ return NULL_RTX;
+}
+
+/* Initialize vector TARGET to VALS. */
void
neon_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner = GET_MODE_INNER (mode);
- unsigned int i, n_elts = GET_MODE_NUNITS (mode);
- rtx mem;
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true;
+ rtx x, mem;
+ int i;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ ++n_var, one_var = i;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
- gcc_assert (VECTOR_MODE_P (mode));
+ if (n_var == 0)
+ {
+ rtx constant = neon_make_constant (vals);
+ if (constant != NULL_RTX)
+ {
+ emit_move_insn (target, constant);
+ return;
+ }
+ }
+
+ /* Splat a single non-constant element if we can. */
+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+ {
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N)));
+ return;
+ }
+
+ /* One field is non-constant. Load constant then overwrite varying
+ field. This is more efficient than using the stack. */
+ if (n_var == 1)
+ {
+ rtx copy = copy_rtx (vals);
+ rtvec ops;
+
+ /* Load constant part of vector, substitute neighboring value for
+ varying element. */
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+ neon_expand_vector_init (target, copy);
+
+ /* Insert variable. */
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+ ops = gen_rtvec (3, x, target, GEN_INT (one_var));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
+ return;
+ }
+ /* Construct the vector in memory one field at a time
+ and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
- XVECEXP (vals, 0, i));
-
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
{
if (mode == HFmode)
{
+ if (!TARGET_NEON_FP16)
+ return GENERAL_REGS;
if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
return NO_REGS;
return GENERAL_REGS;
/* A compare with a shifted operand. Because of canonicalization, the
comparison will have to be swapped when we emit the assembler. */
- if (GET_MODE (y) == SImode && GET_CODE (y) == REG
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|| GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
|| GET_CODE (x) == ROTATERT))
/* This operation is performed swapped, but since we only rely on the Z
flag we don't need an additional mode. */
- if (GET_MODE (y) == SImode && REG_P (y)
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& GET_CODE (x) == NEG
&& (op == EQ || op == NE))
return CC_Zmode;
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ - (count * 8)))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
return "";
}
-/* Output a 'call' insn that is a reference in memory. */
+/* Output a 'call' insn that is a reference in memory. This is
+ disabled for ARMv5 and we prefer a blx instead because otherwise
+ there's a significant performance overhead. */
const char *
output_call_mem (rtx *operands)
{
- if (TARGET_INTERWORK && !arm_arch5)
+ gcc_assert (!arm_arch5);
+ if (TARGET_INTERWORK)
{
output_asm_insn ("ldr%?\t%|ip, %0", operands);
output_asm_insn ("mov%?\t%|lr, %|pc", operands);
first instruction. It's safe to use IP as the target of the
load since the call will kill it anyway. */
output_asm_insn ("ldr%?\t%|ip, %0", operands);
- if (arm_arch5)
- output_asm_insn ("blx%?\t%|ip", operands);
+ output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+ if (arm_arch4t)
+ output_asm_insn ("bx%?\t%|ip", operands);
else
- {
- output_asm_insn ("mov%?\t%|lr, %|pc", operands);
- if (arm_arch4t)
- output_asm_insn ("bx%?\t%|ip", operands);
- else
- output_asm_insn ("mov%?\t%|pc, %|ip", operands);
- }
+ output_asm_insn ("mov%?\t%|pc, %|ip", operands);
}
else
{
return "";
}
-
-/* Emit a MOVW/MOVT pair. */
-void arm_emit_movpair (rtx dest, rtx src)
-{
- emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
- emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
-}
-
+void
+arm_emit_movpair (rtx dest, rtx src)
+ {
+ /* If the src is an immediate, simplify it. */
+ if (CONST_INT_P (src))
+ {
+ HOST_WIDE_INT val = INTVAL (src);
+ emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+ if ((val >> 16) & 0x0000ffff)
+ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+ GEN_INT (16)),
+ GEN_INT ((val >> 16) & 0x0000ffff));
+ return;
+ }
+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+ }
/* Output a move from arm registers to an fpa registers.
OPERANDS[0] is an fpa register.
{
/* We're only using DImode here because it's a convenient size. */
ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
- ops[1] = adjust_address (mem, SImode, 8 * i);
+ ops[1] = adjust_address (mem, DImode, 8 * i);
if (reg_overlap_mentioned_p (ops[0], mem))
{
gcc_assert (overlap == -1);
&& crtl->uses_pic_offset_table)
save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
}
+ else if (IS_VOLATILE(func_type))
+ {
+ /* For noreturn functions we historically omitted register saves
+ altogether. However this really messes up debugging. As a
+ compromise save just the frame pointers. Combined with the link
+ register saved elsewhere this should be sufficient to get
+ a backtrace. */
+ if (frame_pointer_needed)
+ save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
+ }
else
{
/* In the normal case we only need to save those registers
| (1 << LR_REGNUM)
| (1 << PC_REGNUM);
- /* Volatile functions do not return, so there
- is no need to save any other registers. */
- if (IS_VOLATILE (func_type))
- return save_reg_mask;
-
save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
/* Decide if we need to save the link register.
gcc_assert (stack_adjust == 0 || stack_adjust == 4);
if (stack_adjust && arm_arch5 && TARGET_ARM)
- sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
else
{
/* If we can't use ldmib (SA110 bug),
then try to pop r3 instead. */
if (stack_adjust)
live_regs_mask |= 1 << 3;
- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
+
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
}
}
else
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "pop%s\t{", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
p = instr + strlen (instr);
/* This variable is for the Virtual Frame Pointer, not VFP regs. */
int vfp_offset = offsets->frame;
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
}
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
if (TARGET_HARD_FLOAT && TARGET_VFP)
{
- start_reg = FIRST_VFP_REGNUM;
- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+ int end_reg = LAST_VFP_REGNUM + 1;
+
+ /* Scan the registers in reverse order. We need to match
+ any groupings made in the prologue and generate matching
+ pop operations. */
+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
{
if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+ && (!df_regs_ever_live_p (reg + 1)
+ || call_used_regs[reg + 1]))
{
- if (start_reg != reg)
+ if (end_reg > reg + 2)
vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
- start_reg = reg + 2;
+ (reg + 2 - FIRST_VFP_REGNUM) / 2,
+ (end_reg - (reg + 2)) / 2);
+ end_reg = reg;
}
}
- if (start_reg != reg)
- vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
+ if (end_reg > reg + 2)
+ vfp_output_fldmd (f, SP_REGNUM, 0,
+ (end_reg - (reg + 2)) / 2);
}
+
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
/* For the body of the insn we are going to generate an UNSPEC in
parallel with several USEs. This allows the insn to be recognized
- by the push_multi pattern in the arm.md file. The insn looks
- something like this:
+ by the push_multi pattern in the arm.md file.
+
+ The body of the insn looks something like this:
(parallel [
- (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
+ (set (mem:BLK (pre_modify:SI (reg:SI sp)
+ (const_int:SI <num>)))
(unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
- (use (reg:SI 11 fp))
- (use (reg:SI 12 ip))
- (use (reg:SI 14 lr))
- (use (reg:SI 15 pc))
+ (use (reg:SI XX))
+ (use (reg:SI YY))
+ ...
])
For the frame note however, we try to be more explicit and actually
(sequence [
(set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
(set (mem:SI (reg:SI sp)) (reg:SI r4))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+ ...
])
- This sequence is used both by the code to support stack unwinding for
- exceptions handlers and the code to generate dwarf2 frame debugging. */
+ FIXME:: In an ideal world the PRE_MODIFY would not exist and
+ instead we'd have a parallel expression detailing all
+ the stores to the various memory addresses so that debug
+ information is more up-to-date. Remember however while writing
+ this to take care of the constraints with the push instruction.
+
+ Note also that this has to be taken care of for the VFP registers.
+
+ For more see PR43399. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -4 * num_regs))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
{
tmp
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (SImode,
- plus_constant (stack_pointer_rtx,
- 4 * j)),
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx,
+ 4 * j)),
reg);
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -12 * count))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
}
}
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. Frame pointer elimination is automatically handled.
+
+ All eliminations are permissible. Note that ARG_POINTER_REGNUM and
+ HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
+ pointer, we must eliminate FRAME_POINTER_REGNUM into
+ HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
+ ARG_POINTER_REGNUM. */
+
+bool
+arm_can_eliminate (const int from, const int to)
+{
+ return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
+ (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
+ (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
+ (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
+ true);
+}
/* Emit RTL to save coprocessor registers on function entry. Returns the
number of bytes pushed. */
/* Save any floating point call-saved registers used by this
function. */
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
}
return;
+ /* Print the high single-precision register of a VFP double-precision
+ register. */
+ case 'p':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+ }
+ return;
+
/* Print a VFP/Neon double precision or quad precision register name. */
case 'P':
case 'q':
}
return;
+ /* Translate an S register number into a D register number and element index. */
+ case 'y':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+ }
+ return;
+
/* Register specifier for vld1.16/vst1.16. Translate the S register
number into a D register number and element index. */
case 'z':
return VFP_REGNO_OK_FOR_DOUBLE (regno);
/* VFP registers can hold HFmode values, but there is no point in
- putting them there unless we have the NEON extensions for
- loading/storing them, too. */
+ putting them there unless we have hardware conversion insns. */
if (mode == HFmode)
- return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
if (TARGET_NEON)
return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
}
else
{
- int set_float_abi_attributes = 0;
- switch (arm_fpu_arch)
- {
- case FPUTYPE_FPA:
- fpu_name = "fpa";
- break;
- case FPUTYPE_FPA_EMU2:
- fpu_name = "fpe2";
- break;
- case FPUTYPE_FPA_EMU3:
- fpu_name = "fpe3";
- break;
- case FPUTYPE_MAVERICK:
- fpu_name = "maverick";
- break;
- case FPUTYPE_VFP:
- fpu_name = "vfp";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3D16:
- fpu_name = "vfpv3-d16";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3:
- fpu_name = "vfpv3";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON:
- fpu_name = "neon";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON_FP16:
- fpu_name = "neon-fp16";
- set_float_abi_attributes = 1;
- break;
- default:
- abort();
- }
- if (set_float_abi_attributes)
+ fpu_name = arm_fpu_desc->name;
+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
{
if (TARGET_HARD_FLOAT)
asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
if (IS_FPA_REGNUM (regno))
return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
- /* FIXME: VFPv3 register numbering. */
if (IS_VFP_REGNUM (regno))
- return 64 + regno - FIRST_VFP_REGNUM;
+ {
+ /* See comment in arm_dwarf_register_span. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return 64 + regno - FIRST_VFP_REGNUM;
+ else
+ return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+ }
if (IS_IWMMXT_GR_REGNUM (regno))
return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
gcc_unreachable ();
}
+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+ GCC models tham as 64 32-bit registers, so we need to describe this to
+ the DWARF generation code. Other registers can use the default. */
+static rtx
+arm_dwarf_register_span (rtx rtl)
+{
+ unsigned regno;
+ int nregs;
+ int i;
+ rtx p;
+
+ regno = REGNO (rtl);
+ if (!IS_VFP_REGNUM (regno))
+ return NULL_RTX;
+
+ /* XXX FIXME: The EABI defines two VFP register ranges:
+ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+ 256-287: D0-D31
+ The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
+ corresponding D register. Until GDB supports this, we shall use the
+ legacy encodings. We also use these encodings for D0-D15 for
+ compatibility with older debuggers. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return NULL_RTX;
+
+ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
+ regno = (regno - FIRST_VFP_REGNUM) / 2;
+ for (i = 0; i < nregs; i++)
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+
+ return p;
+}
#ifdef TARGET_UNWIND_INFO
/* Emit unwind directives for a store-multiple instruction or stack pointer
&& lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
{
static bool warned;
- if (!warned && warn_psabi)
+ if (!warned && warn_psabi && !in_system_header)
{
warned = true;
inform (input_location,
|| (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
}
+/* Only thumb1 can't support conditional execution, so return true if
+ the target is not thumb1. */
+static bool
+arm_have_conditional_execution (void)
+{
+ return !TARGET_THUMB1;
+}
+
#include "gt-arm.h"