/* Output routines for GCC for ARM.
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
#include "obstack.h"
#include "regs.h"
#include "hard-reg-set.h"
-#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
#include "optabs.h"
#include "toplev.h"
#include "recog.h"
+#include "cgraph.h"
#include "ggc.h"
#include "except.h"
#include "c-pragma.h"
#include "debug.h"
#include "langhooks.h"
#include "df.h"
+#include "intl.h"
+#include "libfuncs.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
typedef struct minipool_fixup Mfix;
-const struct attribute_spec arm_attribute_table[];
-
void (*arm_lang_output_object_attributes_hook)(void);
/* Forward function declarations. */
static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
inline static int thumb1_index_register_rtx_p (rtx, int);
+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static unsigned long arm_isr_value (tree);
static unsigned long arm_compute_func_type (void);
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
static int count_insns_for_constant (HOST_WIDE_INT, int);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
+static enum machine_mode arm_promote_function_mode (const_tree,
+ enum machine_mode, int *,
+ const_tree, int);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
+
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
-static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
-static bool arm_size_rtx_costs (rtx, int, int, int *);
-static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
-static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
-static bool arm_xscale_rtx_costs (rtx, int, int, int *);
-static bool arm_9e_rtx_costs (rtx, int, int, int *);
+static bool arm_have_conditional_execution (void);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_rtx_costs (rtx, int, int, int *, bool);
static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
static bool arm_cirrus_insn_p (rtx);
static void cirrus_reorg (rtx);
static void arm_init_builtins (void);
-static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static void arm_init_iwmmxt_builtins (void);
static rtx safe_vector_operand (rtx, enum machine_mode);
static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
static rtx emit_set_insn (rtx, rtx);
static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+ const_tree);
+static int aapcs_select_return_coproc (const_tree, const_tree);
#ifdef OBJECT_FORMAT_ELF
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static bool arm_output_ttype (rtx);
#endif
static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
+static rtx arm_dwarf_register_span (rtx);
static tree arm_cxx_guard_type (void);
static bool arm_cxx_guard_mask_bit (void);
static bool arm_cxx_class_data_always_comdat (void);
static bool arm_cxx_use_aeabi_atexit (void);
static void arm_init_libfuncs (void);
+static tree arm_build_builtin_va_list (void);
+static void arm_expand_builtin_va_start (tree, rtx);
+static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
static bool arm_handle_option (size_t, const char *, int);
static void arm_target_help (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
static int arm_issue_rate (void);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
static bool arm_allocate_stack_slots_for_args (void);
+static const char *arm_invalid_parameter_type (const_tree t);
+static const char *arm_invalid_return_type (const_tree t);
+static tree arm_promoted_type (const_tree t);
+static tree arm_convert_to_type (tree type, tree expr);
+static bool arm_scalar_mode_supported_p (enum machine_mode);
+static bool arm_frame_pointer_required (void);
+static bool arm_can_eliminate (const int, const int);
+static void arm_asm_trampoline_template (FILE *);
+static void arm_trampoline_init (rtx, tree, rtx);
+static rtx arm_trampoline_adjust_address (rtx);
+static rtx arm_pic_static_addr (rtx orig, rtx reg);
+
+\f
+/* Table of machine attributes. */
+static const struct attribute_spec arm_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+ /* Function calls made to this symbol must be done indirectly, because
+ it may lie outside of the 26 bit addressing range of a normal function
+ call. */
+ { "long_call", 0, 0, false, true, true, NULL },
+ /* Whereas these functions are always known to reside within the 26 bit
+ addressing range. */
+ { "short_call", 0, 0, false, true, true, NULL },
+ /* Specify the procedure call conventions for a function. */
+ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
+ /* Interrupt Service Routines have special prologue and epilogue requirements. */
+ { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
+ { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
+ { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
+#ifdef ARM_PE
+ /* ARM/PE has three new attributes:
+ interfacearm - ?
+ dllexport - for exporting a function/variable that will live in a dll
+ dllimport - for importing a function/variable from a dll
+ Microsoft allows multiple declspecs in one __declspec, separating
+ them with spaces. We do NOT support this. Instead, use __declspec
+ multiple times.
+ */
+ { "dllimport", 0, 0, true, false, false, NULL },
+ { "dllexport", 0, 0, true, false, false, NULL },
+ { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+ { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
+#endif
+ { NULL, 0, 0, false, false, false, NULL }
+};
\f
/* Initialize the GCC target structure. */
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
#endif
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
-#undef TARGET_PROMOTE_FUNCTION_ARGS
-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
-#undef TARGET_PROMOTE_FUNCTION_RETURN
-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
#undef TARGET_PASS_BY_REFERENCE
#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
+
#undef TARGET_DEFAULT_SHORT_ENUMS
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
#define TARGET_HAVE_TLS true
#endif
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE arm_mangle_type
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
+
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
#endif
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
+
+#undef TARGET_INVALID_PARAMETER_TYPE
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
+
+#undef TARGET_INVALID_RETURN_TYPE
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
+
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE arm_promoted_type
+
+#undef TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arm_can_eliminate
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Obstack for minipool constant handling. */
/* True if we are currently building a constant table. */
int making_const_table;
-/* Define the information needed to generate branch insns. This is
- stored from the compare operation. */
-rtx arm_compare_op0, arm_compare_op1;
-
/* The processor for which instructions should be scheduled. */
enum processor_type arm_tune = arm_none;
+/* The current tuning set. */
+const struct tune_params *current_tune;
+
/* The default processor used if not overridden by commandline. */
static enum processor_type arm_default_cpu = arm_none;
-/* Which floating point model to use. */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available. */
-enum fputype arm_fpu_arch;
-
/* Which floating point hardware to schedule for. */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use. */
+const struct arm_fpu_desc *arm_fpu_desc;
/* Whether to use floating point hardware. */
enum float_abi_type arm_float_abi;
+/* Which __fp16 format to use. */
+enum arm_fp16_format_type arm_fp16_format;
+
/* Which ABI to use. */
enum arm_abi_type arm_abi;
#define FL_DIV (1 << 18) /* Hardware divide. */
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
#define FL_NEON (1 << 20) /* Neon instructions. */
+#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
+ architecture. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
/* The bits in this mask specify which
instructions we are allowed to generate. */
/* Nonzero if instructions not present in the 'M' profile can be used. */
int arm_arch_notm = 0;
+/* Nonzero if instructions present in ARMv7E-M can be used. */
+int arm_arch7em = 0;
+
/* Nonzero if this chip can benefit from load scheduling. */
int arm_ld_sched = 0;
/* The register number to be used for the PIC offset register. */
unsigned arm_pic_register = INVALID_REGNUM;
-/* Set to 1 when a return insn is output, this means that the epilogue
- is not needed. */
-int return_used_this_function;
-
/* Set to 1 after arm_reorg has started. Reset to start at the start of
the next function. */
static int after_arm_reorg = 0;
-/* The maximum number of insns to be used when loading a constant. */
-static int arm_constant_limit = 3;
+static enum arm_pcs arm_pcs_default;
/* For an explanation of these variables, see final_prescan_insn below. */
int arm_ccfsm_state;
enum processor_type core;
const char *arch;
const unsigned long flags;
- bool (* rtx_costs) (rtx, int, int, int *);
+ const struct tune_params *const tune;
+};
+
+const struct tune_params arm_slowmul_tune =
+{
+ arm_slowmul_rtx_costs,
+ 3
+};
+
+const struct tune_params arm_fastmul_tune =
+{
+ arm_fastmul_rtx_costs,
+ 1
+};
+
+const struct tune_params arm_xscale_tune =
+{
+ arm_xscale_rtx_costs,
+ 2
+};
+
+const struct tune_params arm_9e_tune =
+{
+ arm_9e_rtx_costs,
+ 1
};
/* Not all of these give usefully different compilation alternatives,
{
/* ARM Cores */
#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
- {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
+ {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
#include "arm-cores.def"
#undef ARM_CORE
{NULL, arm_none, NULL, 0, NULL}
static const struct processors all_architectures[] =
{
/* ARM Architectures */
- /* We don't specify rtx_costs here as it will be figured out
+ /* We don't specify tuning costs here as it will be figured out
from the core. */
{"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
{"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
+ {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
{"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
{"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
+ {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
{NULL, arm_none, NULL, 0 , NULL}
};
char arm_arch_name[] = "__ARM_ARCH_0UNK__";
-struct fpu_desc
-{
- const char * name;
- enum fputype fpu;
-};
-
-
/* Available values for -mfpu=. */
-static const struct fpu_desc all_fpus[] =
-{
- {"fpa", FPUTYPE_FPA},
- {"fpe2", FPUTYPE_FPA_EMU2},
- {"fpe3", FPUTYPE_FPA_EMU2},
- {"maverick", FPUTYPE_MAVERICK},
- {"vfp", FPUTYPE_VFP},
- {"vfp3", FPUTYPE_VFP3},
- {"vfpv3", FPUTYPE_VFP3},
- {"vfpv3-d16", FPUTYPE_VFP3D16},
- {"neon", FPUTYPE_NEON}
-};
-
-
-/* Floating point models used by the different hardware.
- See fputype in arm.h. */
-
-static const enum fputype fp_model_for_fpu[] =
-{
- /* No FP hardware. */
- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
- ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
+static const struct arm_fpu_desc all_fpus[] =
+{
+ {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
+ {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
+ {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
+ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
+ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
+ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
+ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
+ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
+ /* Compatibility aliases. */
+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
};
};
+struct fp16_format
+{
+ const char *name;
+ enum arm_fp16_format_type fp16_format_type;
+};
+
+
+/* Available values for -mfp16-format=. */
+
+static const struct fp16_format all_fp16_formats[] =
+{
+ {"none", ARM_FP16_FORMAT_NONE},
+ {"ieee", ARM_FP16_FORMAT_IEEE},
+ {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
+};
+
+
struct abi_name
{
const char *name;
TLS_LE32
};
+/* The maximum number of insns to be used when loading a constant. */
+inline static int
+arm_constant_limit (bool size_p)
+{
+ return size_p ? 1 : current_tune->constant_limit;
+}
+
/* Emit an insn that's a simple single-set. Both the operands must be known
to be valid. */
inline static rtx
set_optab_libfunc (umod_optab, DImode, NULL);
set_optab_libfunc (smod_optab, SImode, NULL);
set_optab_libfunc (umod_optab, SImode, NULL);
+
+ /* Half-precision float operations. The compiler handles all operations
+ with NULL libfuncs by converting the SFmode. */
+ switch (arm_fp16_format)
+ {
+ case ARM_FP16_FORMAT_IEEE:
+ case ARM_FP16_FORMAT_ALTERNATIVE:
+
+ /* Conversions. */
+ set_conv_libfunc (trunc_optab, HFmode, SFmode,
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE
+ ? "__gnu_f2h_ieee"
+ : "__gnu_f2h_alternative"));
+ set_conv_libfunc (sext_optab, SFmode, HFmode,
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE
+ ? "__gnu_h2f_ieee"
+ : "__gnu_h2f_alternative"));
+
+ /* Arithmetic. */
+ set_optab_libfunc (add_optab, HFmode, NULL);
+ set_optab_libfunc (sdiv_optab, HFmode, NULL);
+ set_optab_libfunc (smul_optab, HFmode, NULL);
+ set_optab_libfunc (neg_optab, HFmode, NULL);
+ set_optab_libfunc (sub_optab, HFmode, NULL);
+
+ /* Comparisons. */
+ set_optab_libfunc (eq_optab, HFmode, NULL);
+ set_optab_libfunc (ne_optab, HFmode, NULL);
+ set_optab_libfunc (lt_optab, HFmode, NULL);
+ set_optab_libfunc (le_optab, HFmode, NULL);
+ set_optab_libfunc (ge_optab, HFmode, NULL);
+ set_optab_libfunc (gt_optab, HFmode, NULL);
+ set_optab_libfunc (unord_optab, HFmode, NULL);
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_AAPCS_BASED)
+ synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+}
+
+/* On AAPCS systems, this is the "struct __va_list". */
+static GTY(()) tree va_list_type;
+
+/* Return the type to use as __builtin_va_list. */
+static tree
+arm_build_builtin_va_list (void)
+{
+ tree va_list_name;
+ tree ap_field;
+
+ if (!TARGET_AAPCS_BASED)
+ return std_build_builtin_va_list ();
+
+ /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
+ defined as:
+
+ struct __va_list
+ {
+ void *__ap;
+ };
+
+ The C Library ABI further reinforces this definition in \S
+ 4.1.
+
+ We must follow this definition exactly. The structure tag
+ name is visible in C++ mangled names, and thus forms a part
+ of the ABI. The field name may be used by people who
+ #include <stdarg.h>. */
+ /* Create the type. */
+ va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+ /* Give it the required name. */
+ va_list_name = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL,
+ get_identifier ("__va_list"),
+ va_list_type);
+ DECL_ARTIFICIAL (va_list_name) = 1;
+ TYPE_NAME (va_list_type) = va_list_name;
+ /* Create the __ap field. */
+ ap_field = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL,
+ get_identifier ("__ap"),
+ ptr_type_node);
+ DECL_ARTIFICIAL (ap_field) = 1;
+ DECL_FIELD_CONTEXT (ap_field) = va_list_type;
+ TYPE_FIELDS (va_list_type) = ap_field;
+ /* Compute its layout. */
+ layout_type (va_list_type);
+
+ return va_list_type;
+}
+
+/* Return an expression of type "void *" pointing to the next
+ available argument in a variable-argument list. VALIST is the
+ user-level va_list object, of type __builtin_va_list. */
+static tree
+arm_extract_valist_ptr (tree valist)
+{
+ if (TREE_TYPE (valist) == error_mark_node)
+ return error_mark_node;
+
+ /* On an AAPCS target, the pointer is stored within "struct
+ va_list". */
+ if (TARGET_AAPCS_BASED)
+ {
+ tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
+ valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
+ valist, ap_field, NULL_TREE);
+ }
+
+ return valist;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
+static void
+arm_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+ valist = arm_extract_valist_ptr (valist);
+ std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
+static tree
+arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p)
+{
+ valist = arm_extract_valist_ptr (valist);
+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
}
/* Implement TARGET_HANDLE_OPTION. */
const struct processors * sel;
unsigned int sought;
- selected_cpu = TARGET_CPU_DEFAULT;
+ selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
if (selected_cpu == arm_none)
{
#ifdef SUBTARGET_CPU_DEFAULT
/* Use the subtarget default CPU if none was specified by
configure. */
- selected_cpu = SUBTARGET_CPU_DEFAULT;
+ selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
#endif
/* Default to ARM6. */
if (selected_cpu == arm_none)
gcc_assert (arm_tune != arm_none);
tune_flags = all_cores[(int)arm_tune].flags;
+ current_tune = all_cores[(int)arm_tune].tune;
+
+ if (target_fp16_format_name)
+ {
+ for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
+ {
+ if (streq (all_fp16_formats[i].name, target_fp16_format_name))
+ {
+ arm_fp16_format = all_fp16_formats[i].fp16_format_type;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE (all_fp16_formats))
+ error ("invalid __fp16 format option: -mfp16-format=%s",
+ target_fp16_format_name);
+ }
+ else
+ arm_fp16_format = ARM_FP16_FORMAT_NONE;
if (target_abi_name)
{
arm_arch6 = (insn_flags & FL_ARCH6) != 0;
arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
error ("iwmmxt abi requires an iwmmxt capable cpu");
- arm_fp_model = ARM_FP_MODEL_UNKNOWN;
if (target_fpu_name == NULL && target_fpe_name != NULL)
{
if (streq (target_fpe_name, "2"))
error ("invalid floating point emulation option: -mfpe=%s",
target_fpe_name);
}
- if (target_fpu_name != NULL)
- {
- /* The user specified a FPU. */
- for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
- {
- if (streq (all_fpus[i].name, target_fpu_name))
- {
- arm_fpu_arch = all_fpus[i].fpu;
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- break;
- }
- }
- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
- error ("invalid floating point option: -mfpu=%s", target_fpu_name);
- }
- else
+
+ if (target_fpu_name == NULL)
{
#ifdef FPUTYPE_DEFAULT
- /* Use the default if it is specified for this platform. */
- arm_fpu_arch = FPUTYPE_DEFAULT;
- arm_fpu_tune = FPUTYPE_DEFAULT;
+ target_fpu_name = FPUTYPE_DEFAULT;
#else
- /* Pick one based on CPU type. */
- /* ??? Some targets assume FPA is the default.
- if ((insn_flags & FL_VFP) != 0)
- arm_fpu_arch = FPUTYPE_VFP;
- else
- */
if (arm_arch_cirrus)
- arm_fpu_arch = FPUTYPE_MAVERICK;
+ target_fpu_name = "maverick";
else
- arm_fpu_arch = FPUTYPE_FPA_EMU2;
+ target_fpu_name = "fpe2";
#endif
- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
- arm_fpu_tune = FPUTYPE_FPA;
+ }
+
+ arm_fpu_desc = NULL;
+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+ {
+ if (streq (all_fpus[i].name, target_fpu_name))
+ {
+ arm_fpu_desc = &all_fpus[i];
+ break;
+ }
+ }
+
+ if (!arm_fpu_desc)
+ {
+ error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+ return;
+ }
+
+ switch (arm_fpu_desc->model)
+ {
+ case ARM_FP_MODEL_FPA:
+ if (arm_fpu_desc->rev == 2)
+ arm_fpu_attr = FPU_FPE2;
+ else if (arm_fpu_desc->rev == 3)
+ arm_fpu_attr = FPU_FPE3;
else
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+ arm_fpu_attr = FPU_FPA;
+ break;
+
+ case ARM_FP_MODEL_MAVERICK:
+ arm_fpu_attr = FPU_MAVERICK;
+ break;
+
+ case ARM_FP_MODEL_VFP:
+ arm_fpu_attr = FPU_VFP;
+ break;
+
+ default:
+ gcc_unreachable();
}
if (target_float_abi_name != NULL)
else
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
- sorry ("-mfloat-abi=hard and VFP");
+ if (TARGET_AAPCS_BASED
+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
+ error ("FPA is unsupported in the AAPCS");
+
+ if (TARGET_AAPCS_BASED)
+ {
+ if (TARGET_CALLER_INTERWORKING)
+ error ("AAPCS does not support -mcaller-super-interworking");
+ else
+ if (TARGET_CALLEE_INTERWORKING)
+ error ("AAPCS does not support -mcallee-super-interworking");
+ }
/* FPA and iWMMXt are incompatible because the insn encodings overlap.
VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
if (TARGET_THUMB2 && TARGET_IWMMXT)
sorry ("Thumb-2 iWMMXt");
+ /* __fp16 support currently assumes the core has ldrh. */
+ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+ sorry ("__fp16 and no ldrh");
+
/* If soft-float is specified then don't use FPU. */
if (TARGET_SOFT_FLOAT)
- arm_fpu_arch = FPUTYPE_NONE;
+ arm_fpu_attr = FPU_NONE;
+
+ if (TARGET_AAPCS_BASED)
+ {
+ if (arm_abi == ARM_ABI_IWMMXT)
+ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+ else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+ && TARGET_HARD_FLOAT
+ && TARGET_VFP)
+ arm_pcs_default = ARM_PCS_AAPCS_VFP;
+ else
+ arm_pcs_default = ARM_PCS_AAPCS;
+ }
+ else
+ {
+ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+ sorry ("-mfloat-abi=hard and VFP");
+
+ if (arm_abi == ARM_ABI_APCS)
+ arm_pcs_default = ARM_PCS_APCS;
+ else
+ arm_pcs_default = ARM_PCS_ATPCS;
+ }
/* For arm2/3 there is no need to do any scheduling if there is only
a floating point emulator, or we are doing software floating-point. */
if ((TARGET_SOFT_FLOAT
- || arm_fpu_tune == FPUTYPE_FPA_EMU2
- || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+ || (TARGET_FPA && arm_fpu_desc->rev))
&& (tune_flags & FL_MODE32) == 0)
flag_schedule_insns = flag_schedule_insns_after_reload = 0;
/* Use the cp15 method if it is available. */
if (target_thread_pointer == TP_AUTO)
{
- if (arm_arch6k && !TARGET_THUMB)
+ if (arm_arch6k && !TARGET_THUMB1)
target_thread_pointer = TP_CP15;
else
target_thread_pointer = TP_SOFT;
fix_cm3_ldrd = 0;
}
- /* ??? We might want scheduling for thumb2. */
- if (TARGET_THUMB && flag_schedule_insns)
+ if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
flag_schedule_insns = 0;
if (optimize_size)
{
- arm_constant_limit = 1;
-
/* If optimizing for size, bump the number of instructions that we
are prepared to conditionally execute (even on a StrongARM). */
max_insns_skipped = 6;
}
else
{
- /* For processors with load scheduling, it never costs more than
- 2 cycles to load a constant, and the load scheduler may well
- reduce that to 1. */
- if (arm_ld_sched)
- arm_constant_limit = 1;
-
- /* On XScale the longer latency of a load makes it more difficult
- to achieve a good schedule, so it's faster to synthesize
- constants that can be done in two insns. */
- if (arm_tune_xscale)
- arm_constant_limit = 2;
-
/* StrongARM has early execution of branches, so a sequence
that is worth skipping is shorter. */
if (arm_tune_strongarm)
max_insns_skipped = 3;
}
+ /* Hot/Cold partitioning is not currently supported, since we can't
+ handle literal pool placement in that case. */
+ if (flag_reorder_blocks_and_partition)
+ {
+ inform (input_location,
+ "-freorder-blocks-and-partition not supported on this architecture");
+ flag_reorder_blocks_and_partition = 0;
+ flag_reorder_blocks = 1;
+ }
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
}
\f
+/* Output assembler code for a block containing the constant parts
+ of a trampoline, leaving space for the variable parts.
+
+ On the ARM, (if r8 is the static chain regnum, and remembering that
+ referencing pc adds an offset of 8) the trampoline looks like:
+ ldr r8, [pc, #0]
+ ldr pc, [pc]
+ .word static chain value
+ .word function's address
+ XXX FIXME: When the trampoline returns, r8 will be clobbered. */
+
+static void
+arm_asm_trampoline_template (FILE *f)
+{
+ if (TARGET_ARM)
+ {
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else if (TARGET_THUMB2)
+ {
+ /* The Thumb-2 trampoline is similar to the arm implementation.
+ Unlike 16-bit Thumb, we enter the stub in thumb mode. */
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+ STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else
+ {
+ ASM_OUTPUT_ALIGN (f, 2);
+ fprintf (f, "\t.code\t16\n");
+ fprintf (f, ".Ltrampoline_start:\n");
+ asm_fprintf (f, "\tpush\t{r0, r1}\n");
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
+ asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
+ }
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline. */
+
+static void
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+ rtx fnaddr, mem, a_tramp;
+
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
+ emit_move_insn (mem, chain_value);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
+ fnaddr = XEXP (DECL_RTL (fndecl), 0);
+ emit_move_insn (mem, fnaddr);
+
+ a_tramp = XEXP (m_tramp, 0);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+ LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
+ plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+/* Thumb trampolines should be entered in thumb mode, so set
+ the bottom bit of the address. */
+
+static rtx
+arm_trampoline_adjust_address (rtx addr)
+{
+ if (TARGET_THUMB)
+ addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
+ NULL, 0, OPTAB_LIB_WIDEN);
+ return addr;
+}
+\f
/* Return 1 if it is possible to return using a single instruction.
If SIBLING is non-null, this is a test for a return before a sibling
call. SIBLING is the call insn, so we can examine its register usage. */
switch (code)
{
case PLUS:
+ case COMPARE:
+ case EQ:
+ case NE:
+ case GT:
+ case LE:
+ case LT:
+ case GE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
case MINUS: /* Should only occur with (MINUS I reg) => rsb */
case XOR:
+ return 0;
+
case IOR:
+ if (TARGET_THUMB2)
+ return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
return 0;
case AND:
&& !cond
&& (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1, 0)
- > arm_constant_limit + (code != SET)))
+ > (arm_constant_limit (optimize_function_for_size_p (cfun))
+ + (code != SET))))
{
if (code == SET)
{
1);
}
-/* Return the number of ARM instructions required to synthesize the given
- constant. */
+/* Return the number of instructions required to synthesize the given
+ constant, if we start emitting them from bit-position I. */
static int
count_insns_for_constant (HOST_WIDE_INT remainder, int i)
{
HOST_WIDE_INT temp1;
+ int step_size = TARGET_ARM ? 2 : 1;
int num_insns = 0;
+
+ gcc_assert (TARGET_ARM || i == 0);
+
do
{
int end;
if (i <= 0)
i += 32;
- if (remainder & (3 << (i - 2)))
+ if (remainder & (((1 << step_size) - 1) << (i - step_size)))
{
end = i - 8;
if (end < 0)
| ((i < end) ? (0xff >> (32 - end)) : 0));
remainder &= ~temp1;
num_insns++;
- i -= 6;
+ i -= 8 - step_size;
}
- i -= 2;
+ i -= step_size;
} while (remainder);
return num_insns;
}
-/* Emit an instruction with the indicated PATTERN. If COND is
- non-NULL, conditionalize the execution of the instruction on COND
- being true. */
-
-static void
-emit_constant_insn (rtx cond, rtx pattern)
+static int
+find_best_start (unsigned HOST_WIDE_INT remainder)
{
- if (cond)
- pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
- emit_insn (pattern);
-}
+ int best_consecutive_zeros = 0;
+ int i;
+ int best_start = 0;
-/* As above, but extra parameter GENERATE which, if clear, suppresses
+ /* If we aren't targetting ARM, the best place to start is always at
+ the bottom. */
+ if (! TARGET_ARM)
+ return 0;
+
+ for (i = 0; i < 32; i += 2)
+ {
+ int consecutive_zeros = 0;
+
+ if (!(remainder & (3 << i)))
+ {
+ while ((i < 32) && !(remainder & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
+ }
+ }
+
+ /* So long as it won't require any more insns to do so, it's
+ desirable to emit a small constant (in bits 0...9) in the last
+ insn. This way there is more chance that it can be combined with
+ a later addressing insn to form a pre-indexed load or store
+ operation. Consider:
+
+ *((volatile int *)0xe0000100) = 1;
+ *((volatile int *)0xe0000110) = 2;
+
+ We want this to wind up as:
+
+ mov rA, #0xe0000000
+ mov rB, #1
+ str rB, [rA, #0x100]
+ mov rB, #2
+ str rB, [rA, #0x110]
+
+ rather than having to synthesize both large constants from scratch.
+
+ Therefore, we calculate how many insns would be required to emit
+ the constant starting from `best_start', and also starting from
+ zero (i.e. with bit 31 first to be output). If `best_start' doesn't
+ yield a shorter sequence, we may as well use zero. */
+ if (best_start != 0
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+ && (count_insns_for_constant (remainder, 0) <=
+ count_insns_for_constant (remainder, best_start)))
+ best_start = 0;
+
+ return best_start;
+}
+
+/* Emit an instruction with the indicated PATTERN. If COND is
+ non-NULL, conditionalize the execution of the instruction on COND
+ being true. */
+
+static void
+emit_constant_insn (rtx cond, rtx pattern)
+{
+ if (cond)
+ pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
+ emit_insn (pattern);
+}
+
+/* As above, but extra parameter GENERATE which, if clear, suppresses
RTL generation. */
/* ??? This needs more work for thumb2. */
{
int can_invert = 0;
int can_negate = 0;
+ int final_invert = 0;
int can_negate_initial = 0;
- int can_shift = 0;
int i;
int num_bits_set = 0;
int set_sign_bit_copies = 0;
int insns = 0;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+ int step_size = TARGET_ARM ? 2 : 1;
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
{
case SET:
can_invert = 1;
- can_shift = 1;
can_negate = 1;
break;
GEN_INT (ARM_SIGN_EXTEND (val))));
return 1;
}
+
if (remainder == 0)
{
if (reload_completed && rtx_equal_p (target, source))
return 0;
+
if (generate)
emit_constant_insn (cond,
gen_rtx_SET (VOIDmode, target, source));
return 1;
}
+
+ if (TARGET_THUMB2)
+ can_invert = 1;
break;
case AND:
return 1;
}
- /* We don't know how to handle other cases yet. */
- gcc_assert (remainder == 0xffffffff);
-
- if (generate)
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, target,
- gen_rtx_NOT (mode, source)));
- return 1;
+ if (remainder == 0xffffffff)
+ {
+ if (generate)
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ return 1;
+ }
+ break;
case MINUS:
/* We treat MINUS as (val - source), since (source - val) is always
/* Calculate a few attributes that may be useful for specific
optimizations. */
+ /* Count number of leading zeros. */
for (i = 31; i >= 0; i--)
{
if ((remainder & (1 << i)) == 0)
break;
}
+ /* Count number of leading 1's. */
for (i = 31; i >= 0; i--)
{
if ((remainder & (1 << i)) != 0)
break;
}
+ /* Count number of trailing zero's. */
for (i = 0; i <= 31; i++)
{
if ((remainder & (1 << i)) == 0)
break;
}
+ /* Count number of trailing 1's. */
for (i = 0; i <= 31; i++)
{
if ((remainder & (1 << i)) != 0)
if (code == XOR)
break;
+ /* Convert.
+ x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
+ and the remainder 0s for e.g. 0xfff00000)
+ x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
+
+ This can be done in 2 instructions by using shifts with mov or mvn.
+ e.g. for
+ x = x | 0xfff00000;
+ we generate.
+ mvn r0, r0, asl #12
+ mvn r0, r0, lsr #12 */
if (set_sign_bit_copies > 8
&& (val & (-1 << (32 - set_sign_bit_copies))) == val)
{
return 2;
}
+ /* Convert
+ x = y | constant (which has set_zero_bit_copies number of trailing ones).
+ to
+ x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
+
+ For eg. r0 = r0 | 0xfff
+ mvn r0, r0, lsr #12
+ mvn r0, r0, asl #12
+
+ */
if (set_zero_bit_copies > 8
&& (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
{
return 2;
}
+ /* This will never be reached for Thumb2 because orn is a valid
+ instruction. This is for Thumb1 and the ARM 32 bit cases.
+
+ x = y | constant (such that ~constant is a valid constant)
+ Transform this to
+ x = ~(~y & ~constant).
+ */
if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
{
if (generate)
if (remainder & (1 << i))
num_bits_set++;
- if (code == AND || (can_invert && num_bits_set > 16))
- remainder = (~remainder) & 0xffffffff;
+ if ((code == AND)
+ || (code != IOR && can_invert && num_bits_set > 16))
+ remainder ^= 0xffffffff;
else if (code == PLUS && num_bits_set > 16)
remainder = (-remainder) & 0xffffffff;
+
+ /* For XOR, if more than half the bits are set and there's a sequence
+ of more than 8 consecutive ones in the pattern then we can XOR by the
+ inverted constant and then invert the final result; this may save an
+ instruction and might also lead to the final mvn being merged with
+ some other operation. */
+ else if (code == XOR && num_bits_set > 16
+ && (count_insns_for_constant (remainder ^ 0xffffffff,
+ find_best_start
+ (remainder ^ 0xffffffff))
+ < count_insns_for_constant (remainder,
+ find_best_start (remainder))))
+ {
+ remainder ^= 0xffffffff;
+ final_invert = 1;
+ }
else
{
can_invert = 0;
/* ??? Use thumb2 replicated constants when the high and low halfwords are
the same. */
{
- int best_start = 0;
- if (!TARGET_THUMB2)
- {
- int best_consecutive_zeros = 0;
-
- for (i = 0; i < 32; i += 2)
- {
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
- {
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
- {
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
- }
- i -= 2;
- }
- }
-
- /* So long as it won't require any more insns to do so, it's
- desirable to emit a small constant (in bits 0...9) in the last
- insn. This way there is more chance that it can be combined with
- a later addressing insn to form a pre-indexed load or store
- operation. Consider:
-
- *((volatile int *)0xe0000100) = 1;
- *((volatile int *)0xe0000110) = 2;
-
- We want this to wind up as:
-
- mov rA, #0xe0000000
- mov rB, #1
- str rB, [rA, #0x100]
- mov rB, #2
- str rB, [rA, #0x110]
-
- rather than having to synthesize both large constants from scratch.
-
- Therefore, we calculate how many insns would be required to emit
- the constant starting from `best_start', and also starting from
- zero (i.e. with bit 31 first to be output). If `best_start' doesn't
- yield a shorter sequence, we may as well use zero. */
- if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
- }
-
/* Now start emitting the insns. */
- i = best_start;
+ i = find_best_start (remainder);
do
{
int end;
}
else
{
- if (remainder && subtargets)
+ if ((final_invert || remainder) && subtargets)
new_src = gen_reg_rtx (mode);
else
new_src = target;
code = PLUS;
insns++;
- if (TARGET_ARM)
- i -= 6;
- else
- i -= 7;
+ i -= 8 - step_size;
}
/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
shifts. */
- if (TARGET_ARM)
- i -= 2;
- else
- i--;
+ i -= step_size;
}
while (remainder);
}
+ if (final_invert)
+ {
+ if (generate)
+ emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ insns++;
+ }
+
return insns;
}
/* Define how to find the value returned by a function. */
-rtx
-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
+static rtx
+arm_function_value(const_tree type, const_tree func,
+ bool outgoing ATTRIBUTE_UNUSED)
{
enum machine_mode mode;
int unsignedp ATTRIBUTE_UNUSED;
rtx r ATTRIBUTE_UNUSED;
mode = TYPE_MODE (type);
+
+ if (TARGET_AAPCS_BASED)
+ return aapcs_allocate_return_reg (mode, type, func);
+
/* Promote integer types. */
if (INTEGRAL_TYPE_P (type))
- PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
+ mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
/* Promotes small structs returned in a register to full-word size
for big-endian AAPCS. */
}
}
- return LIBCALL_VALUE(mode);
+ return LIBCALL_VALUE (mode);
+}
+
+static int
+libcall_eq (const void *p1, const void *p2)
+{
+ return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+}
+
+static hashval_t
+libcall_hash (const void *p1)
+{
+ return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+}
+
+static void
+add_libcall (htab_t htab, rtx libcall)
+{
+ *htab_find_slot (htab, libcall, INSERT) = libcall;
+}
+
+static bool
+arm_libcall_uses_aapcs_base (const_rtx libcall)
+{
+ static bool init_done = false;
+ static htab_t libcall_htab;
+
+ if (!init_done)
+ {
+ init_done = true;
+
+ libcall_htab = htab_create (31, libcall_hash, libcall_eq,
+ NULL);
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, DImode));
+
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, DImode));
+
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sext_optab, SFmode, HFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (trunc_optab, HFmode, SFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, SFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, SFmode));
+ }
+
+ return libcall && htab_find (libcall_htab, libcall) != NULL;
+}
+
+rtx
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
+{
+ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+ && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ /* The following libcalls return their result in integer registers,
+ even though they return a floating point value. */
+ if (arm_libcall_uses_aapcs_base (libcall))
+ return gen_rtx_REG (mode, ARG_REGISTER(1));
+
+ }
+
+ return LIBCALL_VALUE (mode);
}
/* Determine the amount of memory needed to store the possible return
{
int size = 16;
- if (TARGET_ARM)
+ if (TARGET_32BIT)
{
if (TARGET_HARD_FLOAT_ABI)
{
+ if (TARGET_VFP)
+ size += 32;
if (TARGET_FPA)
size += 12;
if (TARGET_MAVERICK)
return size;
}
-/* Decide whether a type should be returned in memory (true)
- or in a register (false). This is called as the target hook
- TARGET_RETURN_IN_MEMORY. */
+/* Decide whether TYPE should be returned in memory (true)
+ or in a register (false). FNTYPE is the type of the function making
+ the call. */
static bool
-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+arm_return_in_memory (const_tree type, const_tree fntype)
{
HOST_WIDE_INT size;
- size = int_size_in_bytes (type);
+ size = int_size_in_bytes (type); /* Negative if not fixed size. */
+
+ if (TARGET_AAPCS_BASED)
+ {
+ /* Simple, non-aggregate types (ie not including vectors and
+ complex) are always returned in a register (or registers).
+ We don't care about which register here, so we can short-cut
+ some of the detail. */
+ if (!AGGREGATE_TYPE_P (type)
+ && TREE_CODE (type) != VECTOR_TYPE
+ && TREE_CODE (type) != COMPLEX_TYPE)
+ return false;
+
+ /* Any return value that is no larger than one word can be
+ returned in r0. */
+ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+ return false;
+
+ /* Check any available co-processors to see if they accept the
+ type as a register candidate (VFP, for example, can return
+ some aggregates in consecutive registers). These aren't
+ available if the call is variadic. */
+ if (aapcs_select_return_coproc (type, fntype) >= 0)
+ return false;
+
+ /* Vector values should be returned using ARM registers, not
+ memory (unless they're over 16 bytes, which will break since
+ we only have four call-clobbered registers to play with). */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+ /* The rest go in memory. */
+ return true;
+ }
- /* Vector values should be returned using ARM registers, not memory (unless
- they're over 16 bytes, which will break since we only have four
- call-clobbered registers to play with). */
if (TREE_CODE (type) == VECTOR_TYPE)
return (size < 0 || size > (4 * UNITS_PER_WORD));
if (!AGGREGATE_TYPE_P (type) &&
- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
- /* All simple types are returned in registers.
- For AAPCS, complex types are treated the same as aggregates. */
- return 0;
+ (TREE_CODE (type) != VECTOR_TYPE))
+ /* All simple types are returned in registers. */
+ return false;
if (arm_abi != ARM_ABI_APCS)
{
the aggregate is either huge or of variable size, and in either case
we will want to return it via memory and not in a register. */
if (size < 0 || size > UNITS_PER_WORD)
- return 1;
+ return true;
if (TREE_CODE (type) == RECORD_TYPE)
{
continue;
if (field == NULL)
- return 0; /* An empty structure. Allowed by an extension to ANSI C. */
+ return false; /* An empty structure. Allowed by an extension to ANSI C. */
/* Check that the first field is valid for returning in a register. */
/* ... Floats are not allowed */
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
/* ... Aggregates that are not themselves valid for returning in
a register are not allowed. */
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
/* Now check the remaining fields, if any. Only bitfields are allowed,
since they are not addressable. */
continue;
if (!DECL_BIT_FIELD_TYPE (field))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
if (TREE_CODE (type) == UNION_TYPE)
continue;
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
#endif /* not ARM_WINCE */
/* Return all other types in memory. */
- return 1;
+ return true;
}
/* Indicate whether or not words of a double are in big-endian order. */
return 1;
}
-/* Initialize a variable CUM of type CUMULATIVE_ARGS
- for a call to a function whose data type is FNTYPE.
- For a library call, FNTYPE is NULL. */
-void
-arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
- rtx libname ATTRIBUTE_UNUSED,
- tree fndecl ATTRIBUTE_UNUSED)
+const struct pcs_attribute_arg
{
- /* On the ARM, the offset starts at 0. */
- pcum->nregs = 0;
- pcum->iwmmxt_nregs = 0;
- pcum->can_split = true;
-
- /* Varargs vectors are treated the same as long long.
- named_count avoids having to change the way arm handles 'named' */
- pcum->named_count = 0;
- pcum->nargs = 0;
+ const char *arg;
+ enum arm_pcs value;
+} pcs_attribute_args[] =
+ {
+ {"aapcs", ARM_PCS_AAPCS},
+ {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+ /* We could recognize these, but changes would be needed elsewhere
+ * to implement them. */
+ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+ {"atpcs", ARM_PCS_ATPCS},
+ {"apcs", ARM_PCS_APCS},
+#endif
+ {NULL, ARM_PCS_UNKNOWN}
+ };
- if (TARGET_REALLY_IWMMXT && fntype)
- {
- tree fn_arg;
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+ const struct pcs_attribute_arg *ptr;
+ const char *arg;
- for (fn_arg = TYPE_ARG_TYPES (fntype);
- fn_arg;
- fn_arg = TREE_CHAIN (fn_arg))
- pcum->named_count += 1;
+ /* Get the value of the argument. */
+ if (TREE_VALUE (attr) == NULL_TREE
+ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+ return ARM_PCS_UNKNOWN;
- if (! pcum->named_count)
- pcum->named_count = INT_MAX;
- }
-}
+ arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+ /* Check it against the list of known arguments. */
+ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+ if (streq (arg, ptr->arg))
+ return ptr->value;
-/* Return true if mode/type need doubleword alignment. */
-bool
-arm_needs_doubleword_align (enum machine_mode mode, tree type)
-{
- return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
- || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+ /* An unrecognized interrupt type. */
+ return ARM_PCS_UNKNOWN;
}
+/* Get the PCS variant to use for this call. TYPE is the function's type
+ specification, DECL is the specific declartion. DECL may be null if
+ the call could be indirect or if this is a library call. */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+ bool user_convention = false;
+ enum arm_pcs user_pcs = arm_pcs_default;
+ tree attr;
-/* Determine where to put an argument to a function.
- Value is zero to push the argument on the stack,
- or a hard register in which to store the argument.
-
- MODE is the argument's machine mode.
- TYPE is the data type of the argument (as a tree).
- This is null for libcalls where that information may
- not be available.
- CUM is a variable of type CUMULATIVE_ARGS which gives info about
- the preceding args and about the function being called.
- NAMED is nonzero if this argument is a named parameter
- (otherwise it is an extra parameter matching an ellipsis). */
+ gcc_assert (type);
-rtx
-arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, int named)
-{
- int nregs;
+ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+ user_convention = true;
+ }
- /* Varargs vectors are treated the same as long long.
- named_count avoids having to change the way arm handles 'named' */
- if (TARGET_IWMMXT_ABI
- && arm_vector_mode_supported_p (mode)
- && pcum->named_count > pcum->nargs + 1)
+ if (TARGET_AAPCS_BASED)
{
- if (pcum->iwmmxt_nregs <= 9)
- return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
- else
+ /* Detect varargs functions. These always use the base rules
+ (no argument is ever a candidate for a co-processor
+ register). */
+ bool base_rules = (TYPE_ARG_TYPES (type) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
+ != void_type_node));
+
+ if (user_convention)
{
- pcum->can_split = false;
- return NULL_RTX;
+ if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+ sorry ("Non-AAPCS derived PCS variant");
+ else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+ error ("Variadic functions must use the base AAPCS variant");
}
- }
-
- /* Put doubleword aligned quantities in even register pairs. */
- if (pcum->nregs & 1
- && ARM_DOUBLEWORD_ALIGN
- && arm_needs_doubleword_align (mode, type))
- pcum->nregs++;
- if (mode == VOIDmode)
- /* Pick an arbitrary value for operand 2 of the call insn. */
- return const0_rtx;
+ if (base_rules)
+ return ARM_PCS_AAPCS;
+ else if (user_convention)
+ return user_pcs;
+ else if (decl && flag_unit_at_a_time)
+ {
+ /* Local functions never leak outside this compilation unit,
+ so we are free to use whatever conventions are
+ appropriate. */
+ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ if (i && i->local)
+ return ARM_PCS_AAPCS_LOCAL;
+ }
+ }
+ else if (user_convention && user_pcs != arm_pcs_default)
+ sorry ("PCS variant");
- /* Only allow splitting an arg between regs and memory if all preceding
- args were allocated to regs. For args passed by reference we only count
- the reference pointer. */
- if (pcum->can_split)
- nregs = 1;
- else
- nregs = ARM_NUM_REGS2 (mode, type);
+ /* For everything else we use the target's default. */
+ return arm_pcs_default;
+}
- if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
- return NULL_RTX;
- return gen_rtx_REG (mode, pcum->nregs);
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ rtx libcall ATTRIBUTE_UNUSED,
+ const_tree fndecl ATTRIBUTE_UNUSED)
+{
+ /* Record the unallocated VFP registers. */
+ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+ pcum->aapcs_vfp_reg_alloc = 0;
}
+/* Walk down the type tree of TYPE counting consecutive base elements.
+ If *MODEP is VOIDmode, then set it to the first valid floating point
+ type. If a non-floating point type is found, or if a floating point
+ type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+ otherwise return the count in the sub-tree. */
static int
-arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, bool named ATTRIBUTE_UNUSED)
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
{
- int nregs = pcum->nregs;
+ enum machine_mode mode;
+ HOST_WIDE_INT size;
- if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
- return 0;
+ switch (TREE_CODE (type))
+ {
+ case REAL_TYPE:
+ mode = TYPE_MODE (type);
+ if (mode != DFmode && mode != SFmode)
+ return -1;
- if (NUM_ARG_REGS > nregs
- && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
- && pcum->can_split)
- return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+ if (*modep == VOIDmode)
+ *modep = mode;
- return 0;
-}
+ if (*modep == mode)
+ return 1;
-/* Variable sized types are passed by reference. This is a GCC
+ break;
+
+ case COMPLEX_TYPE:
+ mode = TYPE_MODE (TREE_TYPE (type));
+ if (mode != DFmode && mode != SFmode)
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 2;
+
+ break;
+
+ case VECTOR_TYPE:
+ /* Use V2SImode and V4SImode as representatives of all 64-bit
+ and 128-bit vector types, whether or not those modes are
+ supported with the present options. */
+ size = int_size_in_bytes (type);
+ switch (size)
+ {
+ case 8:
+ mode = V2SImode;
+ break;
+ case 16:
+ mode = V4SImode;
+ break;
+ default:
+ return -1;
+ }
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ /* Vector modes are considered to be opaque: two vectors are
+ equivalent for the purposes of being homogeneous aggregates
+ if they are the same size. */
+ if (*modep == mode)
+ return 1;
+
+ break;
+
+ case ARRAY_TYPE:
+ {
+ int count;
+ tree index = TYPE_DOMAIN (type);
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+ if (count == -1
+ || !index
+ || !TYPE_MAX_VALUE (index)
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
+ || !TYPE_MIN_VALUE (index)
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
+ || count < 0)
+ return -1;
+
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case RECORD_TYPE:
+ {
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count += sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ /* These aren't very interesting except in a degenerate case. */
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count = count > sub_count ? count : sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ default:
+ break;
+ }
+
+ return -1;
+}
+
+/* Return true if PCS_VARIANT should use VFP registers. */
+static bool
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
+{
+ if (pcs_variant == ARM_PCS_AAPCS_VFP)
+ return true;
+
+ if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+ return false;
+
+ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+ (TARGET_VFP_DOUBLE || !is_double));
+}
+
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type,
+ enum machine_mode *base_mode, int *count)
+{
+ enum machine_mode new_mode = VOIDmode;
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ *count = 1;
+ new_mode = mode;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+ {
+ *count = 2;
+ new_mode = (mode == DCmode ? DFmode : SFmode);
+ }
+ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
+ {
+ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+ if (ag_count > 0 && ag_count <= 4)
+ *count = ag_count;
+ else
+ return false;
+ }
+ else
+ return false;
+
+
+ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+ return false;
+
+ *base_mode = new_mode;
+ return true;
+}
+
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type)
+{
+ int count ATTRIBUTE_UNUSED;
+ enum machine_mode ag_mode ATTRIBUTE_UNUSED;
+
+ if (!use_vfp_abi (pcs_variant, false))
+ return false;
+ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type)
+{
+ if (!use_vfp_abi (pcum->pcs_variant, false))
+ return false;
+
+ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
+ &pcum->aapcs_vfp_rmode,
+ &pcum->aapcs_vfp_rcount);
+}
+
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+ int regno;
+
+ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+ {
+ pcum->aapcs_vfp_reg_alloc = mask << regno;
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+ {
+ int i;
+ int rcount = pcum->aapcs_vfp_rcount;
+ int rshift = shift;
+ enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+ rtx par;
+ if (!TARGET_NEON)
+ {
+ /* Avoid using unsupported vector modes. */
+ if (rmode == V2SImode)
+ rmode = DImode;
+ else if (rmode == V4SImode)
+ {
+ rmode = DImode;
+ rcount *= 2;
+ rshift /= 2;
+ }
+ }
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+ for (i = 0; i < rcount; i++)
+ {
+ rtx tmp = gen_rtx_REG (rmode,
+ FIRST_VFP_REGNUM + regno + i * rshift);
+ tmp = gen_rtx_EXPR_LIST
+ (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (rmode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
+
+ pcum->aapcs_reg = par;
+ }
+ else
+ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+ return true;
+ }
+ return false;
+}
+
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+ enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ if (!use_vfp_abi (pcs_variant, false))
+ return false;
+
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+ {
+ int count;
+ enum machine_mode ag_mode;
+ int i;
+ rtx par;
+ int shift;
+
+ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
+
+ if (!TARGET_NEON)
+ {
+ if (ag_mode == V2SImode)
+ ag_mode = DImode;
+ else if (ag_mode == V4SImode)
+ {
+ ag_mode = DImode;
+ count *= 2;
+ }
+ }
+ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+ for (i = 0; i < count; i++)
+ {
+ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
+
+ return par;
+ }
+
+ return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
+}
+
+static void
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+ pcum->aapcs_vfp_reg_alloc = 0;
+ return;
+}
+
+#define AAPCS_CP(X) \
+ { \
+ aapcs_ ## X ## _cum_init, \
+ aapcs_ ## X ## _is_call_candidate, \
+ aapcs_ ## X ## _allocate, \
+ aapcs_ ## X ## _is_return_candidate, \
+ aapcs_ ## X ## _allocate_return_reg, \
+ aapcs_ ## X ## _advance \
+ }
+
+/* Table of co-processors that can be used to pass arguments in
+ registers. Idealy no arugment should be a candidate for more than
+ one co-processor table entry, but the table is processed in order
+ and stops after the first match. If that entry then fails to put
+ the argument into a co-processor register, the argument will go on
+ the stack. */
+static struct
+{
+ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
+ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+ /* Return true if an argument of mode MODE (or type TYPE if MODE is
+ BLKmode) is a candidate for this co-processor's registers; this
+ function should ignore any position-dependent state in
+ CUMULATIVE_ARGS and only use call-type dependent information. */
+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if the argument does get a co-processor register; it
+ should set aapcs_reg to an RTX of the register allocated as is
+ required for a return from FUNCTION_ARG. */
+ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if a result of mode MODE (or type TYPE if MODE is
+ BLKmode) is can be returned in this co-processor's registers. */
+ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Allocate and return an RTX element to hold the return type of a
+ call, this routine must not fail and will only be called if
+ is_return_candidate returned true with the same parameters. */
+ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Finish processing this argument and prepare to start processing
+ the next one. */
+ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+ {
+ AAPCS_CP(vfp)
+ };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type)
+{
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+ return i;
+
+ return -1;
+}
+
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
+{
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
+
+ if (fntype)
+ {
+ const_tree fndecl = NULL_TREE;
+
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
+
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ }
+ else
+ pcs_variant = arm_pcs_default;
+
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
+ TYPE_MODE (type),
+ type))
+ return i;
+ }
+ return -1;
+}
+
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+ const_tree fntype)
+{
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
+ int unsignedp ATTRIBUTE_UNUSED;
+
+ if (fntype)
+ {
+ const_tree fndecl = NULL_TREE;
+
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
+
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ }
+ else
+ pcs_variant = arm_pcs_default;
+
+ /* Promote integer types. */
+ if (type && INTEGRAL_TYPE_P (type))
+ mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
+
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+ type))
+ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+ mode, type);
+ }
+
+ /* Promotes small structs returned in a register to full-word size
+ for big-endian AAPCS. */
+ if (type && arm_return_in_msb (type))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if (size % UNITS_PER_WORD != 0)
+ {
+ size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+ }
+ }
+
+ return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
+}
+
+/* Lay out a function argument using the AAPCS rules. The rule
+ numbers referred to here are those in the AAPCS. */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, int named)
+{
+ int nregs, nregs2;
+ int ncrn;
+
+ /* We only need to do this once per argument. */
+ if (pcum->aapcs_arg_processed)
+ return;
+
+ pcum->aapcs_arg_processed = true;
+
+ /* Special case: if named is false then we are handling an incoming
+ anonymous argument which is on the stack. */
+ if (!named)
+ return;
+
+ /* Is this a potential co-processor register candidate? */
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
+ {
+ int slot = aapcs_select_call_coproc (pcum, mode, type);
+ pcum->aapcs_cprc_slot = slot;
+
+ /* We don't have to apply any of the rules from part B of the
+ preparation phase, these are handled elsewhere in the
+ compiler. */
+
+ if (slot >= 0)
+ {
+ /* A Co-processor register candidate goes either in its own
+ class of registers or on the stack. */
+ if (!pcum->aapcs_cprc_failed[slot])
+ {
+ /* C1.cp - Try to allocate the argument to co-processor
+ registers. */
+ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+ return;
+
+ /* C2.cp - Put the argument on the stack and note that we
+ can't assign any more candidates in this slot. We also
+ need to note that we have allocated stack space, so that
+ we won't later try to split a non-cprc candidate between
+ core registers and the stack. */
+ pcum->aapcs_cprc_failed[slot] = true;
+ pcum->can_split = false;
+ }
+
+ /* We didn't get a register, so this argument goes on the
+ stack. */
+ gcc_assert (pcum->can_split == false);
+ return;
+ }
+ }
+
+ /* C3 - For double-word aligned arguments, round the NCRN up to the
+ next even number. */
+ ncrn = pcum->aapcs_ncrn;
+ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+ ncrn++;
+
+ nregs = ARM_NUM_REGS2(mode, type);
+
+ /* Sigh, this test should really assert that nregs > 0, but a GCC
+ extension allows empty structs and then gives them empty size; it
+ then allows such a structure to be passed by value. For some of
+ the code below we have to pretend that such an argument has
+ non-zero size so that we 'locate' it correctly either in
+ registers or on the stack. */
+ gcc_assert (nregs >= 0);
+
+ nregs2 = nregs ? nregs : 1;
+
+ /* C4 - Argument fits entirely in core registers. */
+ if (ncrn + nregs2 <= NUM_ARG_REGS)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = ncrn + nregs;
+ return;
+ }
+
+ /* C5 - Some core registers left and there are no arguments already
+ on the stack: split this argument between the remaining core
+ registers and the stack. */
+ if (ncrn < NUM_ARG_REGS && pcum->can_split)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+ return;
+ }
+
+ /* C6 - NCRN is set to 4. */
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
+ return;
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is NULL. */
+void
+arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+ rtx libname,
+ tree fndecl ATTRIBUTE_UNUSED)
+{
+ /* Long call handling. */
+ if (fntype)
+ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ else
+ pcum->pcs_variant = arm_pcs_default;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ if (arm_libcall_uses_aapcs_base (libname))
+ pcum->pcs_variant = ARM_PCS_AAPCS;
+
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_cprc_slot = -1;
+ pcum->can_split = true;
+
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ {
+ pcum->aapcs_cprc_failed[i] = false;
+ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+ }
+ }
+ return;
+ }
+
+ /* Legacy ABIs */
+
+ /* On the ARM, the offset starts at 0. */
+ pcum->nregs = 0;
+ pcum->iwmmxt_nregs = 0;
+ pcum->can_split = true;
+
+ /* Varargs vectors are treated the same as long long.
+ named_count avoids having to change the way arm handles 'named' */
+ pcum->named_count = 0;
+ pcum->nargs = 0;
+
+ if (TARGET_REALLY_IWMMXT && fntype)
+ {
+ tree fn_arg;
+
+ for (fn_arg = TYPE_ARG_TYPES (fntype);
+ fn_arg;
+ fn_arg = TREE_CHAIN (fn_arg))
+ pcum->named_count += 1;
+
+ if (! pcum->named_count)
+ pcum->named_count = INT_MAX;
+ }
+}
+
+
+/* Return true if mode/type need doubleword alignment. */
+bool
+arm_needs_doubleword_align (enum machine_mode mode, tree type)
+{
+ return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
+ || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+}
+
+
+/* Determine where to put an argument to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+rtx
+arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, int named)
+{
+ int nregs;
+
+ /* Handle the special case quickly. Pick an arbitrary value for op2 of
+ a call insn (op3 of a call_value insn). */
+ if (mode == VOIDmode)
+ return const0_rtx;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_reg;
+ }
+
+ /* Varargs vectors are treated the same as long long.
+ named_count avoids having to change the way arm handles 'named' */
+ if (TARGET_IWMMXT_ABI
+ && arm_vector_mode_supported_p (mode)
+ && pcum->named_count > pcum->nargs + 1)
+ {
+ if (pcum->iwmmxt_nregs <= 9)
+ return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
+ else
+ {
+ pcum->can_split = false;
+ return NULL_RTX;
+ }
+ }
+
+ /* Put doubleword aligned quantities in even register pairs. */
+ if (pcum->nregs & 1
+ && ARM_DOUBLEWORD_ALIGN
+ && arm_needs_doubleword_align (mode, type))
+ pcum->nregs++;
+
+ if (mode == VOIDmode)
+ /* Pick an arbitrary value for operand 2 of the call insn. */
+ return const0_rtx;
+
+ /* Only allow splitting an arg between regs and memory if all preceding
+ args were allocated to regs. For args passed by reference we only count
+ the reference pointer. */
+ if (pcum->can_split)
+ nregs = 1;
+ else
+ nregs = ARM_NUM_REGS2 (mode, type);
+
+ if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
+ return NULL_RTX;
+
+ return gen_rtx_REG (mode, pcum->nregs);
+}
+
+static int
+arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, bool named)
+{
+ int nregs = pcum->nregs;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_partial;
+ }
+
+ if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
+ return 0;
+
+ if (NUM_ARG_REGS > nregs
+ && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
+ && pcum->can_split)
+ return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+
+ return 0;
+}
+
+void
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, bool named)
+{
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+
+ if (pcum->aapcs_cprc_slot >= 0)
+ {
+ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+ type);
+ pcum->aapcs_cprc_slot = -1;
+ }
+
+ /* Generic stuff. */
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ }
+ else
+ {
+ pcum->nargs += 1;
+ if (arm_vector_mode_supported_p (mode)
+ && pcum->named_count > pcum->nargs
+ && TARGET_IWMMXT_ABI)
+ pcum->iwmmxt_nregs += 1;
+ else
+ pcum->nregs += ARM_NUM_REGS2 (mode, type);
+ }
+}
+
+/* Variable sized types are passed by reference. This is a GCC
extension to the ARM ABI. */
static bool
arm_pragma_long_calls = OFF;
}
\f
-/* Table of machine attributes. */
-const struct attribute_spec arm_attribute_table[] =
-{
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
- /* Function calls made to this symbol must be done indirectly, because
- it may lie outside of the 26 bit addressing range of a normal function
- call. */
- { "long_call", 0, 0, false, true, true, NULL },
- /* Whereas these functions are always known to reside within the 26 bit
- addressing range. */
- { "short_call", 0, 0, false, true, true, NULL },
- /* Interrupt Service Routines have special prologue and epilogue requirements. */
- { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
- { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
- { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
-#ifdef ARM_PE
- /* ARM/PE has three new attributes:
- interfacearm - ?
- dllexport - for exporting a function/variable that will live in a dll
- dllimport - for importing a function/variable from a dll
-
- Microsoft allows multiple declspecs in one __declspec, separating
- them with spaces. We do NOT support this. Instead, use __declspec
- multiple times.
- */
- { "dllimport", 0, 0, true, false, false, NULL },
- { "dllexport", 0, 0, true, false, false, NULL },
- { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
-#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
- { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
- { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
- { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
-#endif
- { NULL, 0, 0, false, false, false, NULL }
-};
-
/* Handle an attribute requiring a FUNCTION_DECL;
arguments as in struct attribute_spec.handler. */
static tree
{
if (TREE_CODE (*node) != FUNCTION_DECL)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
}
{
if (TREE_CODE (*node) != FUNCTION_DECL)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
}
/* FIXME: the argument if any is checked for type attributes;
{
if (arm_isr_value (args) == ARM_FT_UNKNOWN)
{
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
*no_add_attrs = true;
}
}
}
else
{
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
}
}
}
return NULL_TREE;
}
+/* Handle a "pcs" attribute; arguments as in struct
+ attribute_spec.handler. */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+ return NULL_TREE;
+}
+
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
/* Handle the "notshared" attribute. This attribute is another way of
requesting hidden visibility. ARM's compiler supports
/* For "f", be conservative, and only cater for cases in which the
whole of the current function is placed in the same section. */
if (!flag_reorder_blocks_and_partition
+ && TREE_CODE (decl) == FUNCTION_DECL
&& arm_function_in_section_p (decl, current_function_section ()))
return false;
/* Return nonzero if it is ok to make a tail-call to DECL. */
static bool
-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+arm_function_ok_for_sibcall (tree decl, tree exp)
{
unsigned long func_type;
return false;
/* Never tailcall something for which we have no decl, or if we
- are in Thumb mode. */
- if (decl == NULL || TARGET_THUMB)
+ are generating code for Thumb-1. */
+ if (decl == NULL || TARGET_THUMB1)
return false;
/* The PIC register is live on entry to VxWorks PLT entries, so we
if (IS_INTERRUPT (func_type))
return false;
+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ {
+ /* Check that the return value locations are the same. For
+ example that we aren't returning a value from the sibling in
+ a VFP register but then need to transfer it to a core
+ register. */
+ rtx a, b;
+
+ a = arm_function_value (TREE_TYPE (exp), decl, false);
+ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
+
/* Never tailcall if function may be called with a misaligned SP. */
if (IS_STACKALIGN (func_type))
return false;
gcc_assert (can_create_pseudo_p ());
if (arm_pic_register != INVALID_REGNUM)
{
- cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
+ if (!cfun->machine->pic_reg)
+ cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
/* Play games to avoid marking the function as needing pic
if we are being called as part of the cost-estimation
process. */
- if (current_ir_type () != IR_GIMPLE)
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
crtl->uses_pic_offset_table = 1;
}
else
{
rtx seq;
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+ if (!cfun->machine->pic_reg)
+ cfun->machine->pic_reg = gen_reg_rtx (Pmode);
/* Play games to avoid marking the function as needing pic
if we are being called as part of the cost-estimation
process. */
- if (current_ir_type () != IR_GIMPLE)
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
{
crtl->uses_pic_offset_table = 1;
start_sequence ();
seq = get_insns ();
end_sequence ();
- emit_insn_after (seq, entry_of_function ());
+ /* We can be called during expansion of PHI nodes, where
+ we can't yet emit instructions directly in the final
+ insn stream. Queue the insns on the entry edge, they will
+ be committed after everything else is expanded. */
+ insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
}
}
}
{
rtx pic_ref, address;
rtx insn;
- int subregs = 0;
-
- /* If this function doesn't have a pic register, create one now. */
- require_pic_register ();
if (reg == 0)
{
gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
-
- subregs = 1;
- }
-
- if (subregs)
- address = gen_reg_rtx (Pmode);
- else
- address = reg;
-
- if (TARGET_ARM)
- emit_insn (gen_pic_load_addr_arm (address, orig));
- else if (TARGET_THUMB2)
- emit_insn (gen_pic_load_addr_thumb2 (address, orig));
- else /* TARGET_THUMB1 */
- emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+ address = gen_reg_rtx (Pmode);
+ }
+ else
+ address = reg;
/* VxWorks does not impose a fixed gap between segments; the run-time
gap can be different from the object-file gap. We therefore can't
SYMBOL_REF_LOCAL_P (orig)))
&& NEED_GOT_RELOC
&& !TARGET_VXWORKS_RTP)
- pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
+ insn = arm_pic_static_addr (orig, reg);
else
{
+ /* If this function doesn't have a pic register, create one now. */
+ require_pic_register ();
+
+ if (TARGET_32BIT)
+ emit_insn (gen_pic_load_addr_32bit (address, orig));
+ else /* TARGET_THUMB1 */
+ emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+
pic_ref = gen_const_mem (Pmode,
gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
address));
+ insn = emit_move_insn (reg, pic_ref);
}
- insn = emit_move_insn (reg, pic_ref);
-
/* Put a REG_EQUAL note on this insn, so that it can be optimized
by loop. */
set_unique_reg_note (insn, REG_EQUAL, orig);
{
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
UNSPEC_GOTSYM_OFF);
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
- if (TARGET_ARM)
- {
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
- }
- else if (TARGET_THUMB2)
+ if (TARGET_32BIT)
{
- /* Thumb-2 only allows very limited access to the PC. Calculate the
- address in a temporary register. */
- if (arm_pic_register != INVALID_REGNUM)
- {
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
- }
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+ if (TARGET_ARM)
+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
else
- {
- gcc_assert (can_create_pseudo_p ());
- pic_tmp = gen_reg_rtx (Pmode);
- }
-
- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
}
else /* TARGET_THUMB1 */
{
emit_use (pic_reg);
}
+/* Generate code to load the address of a static var when flag_pic is set. */
+static rtx
+arm_pic_static_addr (rtx orig, rtx reg)
+{
+ rtx l1, labelno, offset_rtx, insn;
+
+ gcc_assert (flag_pic);
+
+ /* We use an UNSPEC rather than a LABEL_REF because this label
+ never appears in the code stream. */
+ labelno = GEN_INT (pic_labelno++);
+ l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ l1 = gen_rtx_CONST (VOIDmode, l1);
+
+ /* On the ARM the PC register contains 'dot + 8' at the time of the
+ addition, on the Thumb it is 'dot + 4'. */
+ offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
+ offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
+ UNSPEC_SYMBOL_OFFSET);
+ offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+
+ if (TARGET_32BIT)
+ {
+ emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
+ if (TARGET_ARM)
+ insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+ else
+ insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ }
+ else /* TARGET_THUMB1 */
+ {
+ emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
+ insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ }
+
+ return insn;
+}
/* Return nonzero if X is valid as an ARM state addressing register. */
static int
/* Return nonzero if X is a valid ARM state address operand. */
int
-arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
- int strict_p)
+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+ int strict_p)
{
bool use_ldrd;
enum rtx_code code = GET_CODE (x);
}
/* Return nonzero if X is a valid Thumb-2 address operand. */
-int
+static int
thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
{
bool use_ldrd;
if (GET_MODE_SIZE (mode) <= 4
&& ! (arm_arch4
&& (mode == HImode
+ || mode == HFmode
|| (mode == QImode && outer == SIGN_EXTEND))))
{
if (code == MULT)
load. */
if (arm_arch4)
{
- if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
+ if (mode == HImode
+ || mode == HFmode
+ || (outer == SIGN_EXTEND && mode == QImode))
range = 256;
else
range = 4096;
}
else
- range = (mode == HImode) ? 4095 : 4096;
+ range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
return (code == CONST_INT
&& INTVAL (index) < range
if (mode == DImode || mode == DFmode)
{
- HOST_WIDE_INT val = INTVAL (index);
- /* ??? Can we assume ldrd for thumb2? */
- /* Thumb-2 ldrd only has reg+const addressing modes. */
- if (code != CONST_INT)
+ if (code == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (index);
+ /* ??? Can we assume ldrd for thumb2? */
+ /* Thumb-2 ldrd only has reg+const addressing modes. */
+ /* ldrd supports offsets of +-1020.
+ However the ldr fallback does not. */
+ return val > -256 && val < 256 && (val & 3) == 0;
+ }
+ else
return 0;
-
- /* ldrd supports offsets of +-1020.
- However the ldr fallback does not. */
- return val > -256 && val < 256 && (val & 3) == 0;
}
if (code == MULT)
addresses based on the frame pointer or arg pointer until the
reload pass starts. This is so that eliminating such addresses
into stack based ones won't produce impossible code. */
-int
+static int
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
{
/* ??? Not clear if this is right. Experiment. */
return 1;
/* This is PC relative data after arm_reorg runs. */
- else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
+ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
+ && reload_completed
&& (GET_CODE (x) == LABEL_REF
|| (GET_CODE (x) == CONST
&& GET_CODE (XEXP (x, 0)) == PLUS
}
}
+bool
+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+ if (TARGET_ARM)
+ return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
+ else if (TARGET_THUMB2)
+ return thumb2_legitimate_address_p (mode, x, strict_p);
+ else /* if (TARGET_THUMB1) */
+ return thumb1_legitimate_address_p (mode, x, strict_p);
+}
+
/* Build the SYMBOL_REF for __tls_get_addr. */
static GTY(()) rtx tls_get_addr_libfunc;
if (TARGET_ARM)
emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
else if (TARGET_THUMB2)
- {
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- }
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
else /* TARGET_THUMB1 */
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
if (TARGET_ARM)
emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
else if (TARGET_THUMB2)
- {
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- emit_move_insn (reg, gen_const_mem (SImode, reg));
- }
+ emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
else
{
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
rtx
arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
{
+ if (!TARGET_ARM)
+ {
+ /* TODO: legitimize_address for Thumb2. */
+ if (TARGET_THUMB2)
+ return x;
+ return thumb_legitimize_address (x, orig_x, mode);
+ }
+
if (arm_tls_symbol_p (x))
return legitimize_tls_address (x, NULL_RTX);
}
/* XXX We don't allow MINUS any more -- see comment in
- arm_legitimate_address_p (). */
+ arm_legitimate_address_outer_p (). */
else if (GET_CODE (x) == MINUS)
{
rtx xop0 = XEXP (x, 0);
x = copy_rtx (x);
push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
- Pmode, VOIDmode, 0, 0, opnum, type);
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
return x;
}
x = copy_rtx (x);
push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
- Pmode, VOIDmode, 0, 0, opnum, type);
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
return x;
}
else if ((outer == PLUS || outer == COMPARE)
&& INTVAL (x) < 256 && INTVAL (x) > -256)
return 0;
- else if (outer == AND
+ else if ((outer == IOR || outer == XOR || outer == AND)
&& INTVAL (x) < 256 && INTVAL (x) >= -256)
return COSTS_N_INSNS (1);
+ else if (outer == AND)
+ {
+ int i;
+ /* This duplicates the tests in the andsi3 expander. */
+ for (i = 9; i <= 31; i++)
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+ return COSTS_N_INSNS (2);
+ }
else if (outer == ASHIFT || outer == ASHIFTRT
|| outer == LSHIFTRT)
return 0;
}
}
-
-/* Worker routine for arm_rtx_costs. */
-/* ??? This needs updating for thumb2. */
-static inline int
-arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
+static inline bool
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
- int extra_cost;
+ rtx operand;
+ enum rtx_code code = GET_CODE (x);
+ *total = 0;
switch (code)
{
case MEM:
/* Memory costs quite a lot for the first word, but subsequent words
load at the equivalent of a single insn each. */
- return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
- + (GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
+ *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+ return true;
case DIV:
case MOD:
case UDIV:
case UMOD:
- return optimize_size ? COSTS_N_INSNS (2) : 100;
+ if (TARGET_HARD_FLOAT && mode == SFmode)
+ *total = COSTS_N_INSNS (2);
+ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
+ *total = COSTS_N_INSNS (4);
+ else
+ *total = COSTS_N_INSNS (20);
+ return false;
case ROTATE:
- if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
- return 4;
+ if (GET_CODE (XEXP (x, 1)) == REG)
+ *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
+ else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ *total = rtx_cost (XEXP (x, 1), code, speed);
+
/* Fall through */
case ROTATERT:
if (mode != SImode)
- return 8;
+ {
+ *total += COSTS_N_INSNS (4);
+ return true;
+ }
+
/* Fall through */
case ASHIFT: case LSHIFTRT: case ASHIFTRT:
+ *total += rtx_cost (XEXP (x, 0), code, speed);
if (mode == DImode)
- return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
- + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 8));
+ {
+ *total += COSTS_N_INSNS (3);
+ return true;
+ }
- extra_cost = 1;
+ *total += COSTS_N_INSNS (1);
/* Increase the cost of complex shifts because they aren't any faster,
and reduce dual issue opportunities. */
if (arm_tune_cortex_a9
&& outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
- extra_cost++;
-
- return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 4)
- + ((GET_CODE (XEXP (x, 1)) == REG
- || (GET_CODE (XEXP (x, 1)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
- || (GET_CODE (XEXP (x, 1)) == CONST_INT))
- ? 0 : 4));
+ ++*total;
+
+ return true;
case MINUS:
- if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
+ if (TARGET_THUMB2)
{
- extra_cost = rtx_cost (XEXP (x, 1), code, true);
- if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (20);
+ }
+ else
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ /* Thumb2 does not have RSB, so all arguments must be
+ registers (subtracting a constant is canonicalized as
+ addition of the negated constant). */
+ return false;
}
if (mode == DImode)
- return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
- ? 0 : 8));
+ {
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 0))))
- ? 0 : 8));
-
- if (((GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))
- && REG_OR_SUBREG_REG (XEXP (x, 1))))
- || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
- || subcode == ASHIFTRT || subcode == LSHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
- (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
- && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
- || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
- && REG_OR_SUBREG_REG (XEXP (x, 0))))
- return 1;
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 0)))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ subcode = GET_CODE (XEXP (x, 1));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
+
+ /* A shift as a part of RSB costs no more than RSB itself. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
+
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
+
+ return true;
+ }
+
/* Fall through */
case PLUS:
- if (arm_arch6 && mode == SImode
+ if (code == PLUS && arm_arch6 && mode == SImode
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
- return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
- + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
+ {
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+ speed);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- if (GET_CODE (XEXP (x, 0)) == MULT)
+ /* MLA: All arguments must be registers. We filter out
+ multiplication by a power of two, so that we fall down into
+ the code below. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
- extra_cost = rtx_cost (XEXP (x, 0), code, true);
- if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
+ /* The cost comes from the cost of the multiply. */
+ return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8));
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
+
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
+ return true;
+ }
/* Fall through */
+
case AND: case XOR: case IOR:
- extra_cost = 0;
/* Normally the frame registers will be spilt into reg+const during
reload, so it is a bad idea to combine them with other instructions,
&& GET_CODE (XEXP (x, 1)) != CONST_INT)
|| (REG_OR_SUBREG_REG (XEXP (x, 0))
&& ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
- extra_cost = 4;
+ *total = 4;
if (mode == DImode)
- return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 8));
-
- if (REG_OR_SUBREG_REG (XEXP (x, 0)))
- return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 4));
-
- else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
- return (1 + extra_cost
- + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
- || subcode == LSHIFTRT || subcode == ASHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
- && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
- && !arm_tune_cortex_a9)
- || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
- ? 0 : 4));
+ {
+ *total += COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
- return 8;
+ return false;
+ }
+
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == UMIN || subcode == UMAX
+ || subcode == SMIN || subcode == SMAX)
+ {
+ *total = COSTS_N_INSNS (3);
+ return true;
+ }
+
+ return false;
case MULT:
/* This should have been handled by the CPU specific routines. */
== GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
- return 8;
- return 99;
+ {
+ *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
+ return true;
+ }
+ *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
+ return false;
case NEG:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (2);
+ return false;
+ }
+
/* Fall through */
case NOT:
- if (mode == DImode)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
+ *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
+ if (mode == SImode && code == NOT)
+ {
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT
+ || (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
+ {
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ /* Register shifts cost an extra cycle. */
+ if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+ *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
+ subcode, speed);
+ return true;
+ }
+ }
- return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
+ return false;
case IF_THEN_ELSE:
if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
- return 14;
- return 2;
+ {
+ *total = COSTS_N_INSNS (4);
+ return true;
+ }
+
+ operand = XEXP (x, 0);
+
+ if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
+ && GET_CODE (XEXP (operand, 0)) == REG
+ && REGNO (XEXP (operand, 0)) == CC_REGNUM))
+ *total += COSTS_N_INSNS (1);
+ *total += (rtx_cost (XEXP (x, 1), code, speed)
+ + rtx_cost (XEXP (x, 2), code, speed));
+ return true;
+
+ case NE:
+ if (mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case GE:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case LT:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case EQ:
+ case GT:
+ case LE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
+ scc_insn:
+ /* SCC insns. In the case where the comparison has already been
+ performed, then they cost 2 instructions. Otherwise they need
+ an additional comparison before them. */
+ *total = COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ return true;
+ }
+ /* Fall through */
case COMPARE:
- return 1;
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ *total = 0;
+ return true;
+ }
+
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ return false;
+
+ case UMIN:
+ case UMAX:
+ case SMIN:
+ case SMAX:
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT
+ || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
case ABS:
- return 4 + (mode == DImode ? 4 : 0);
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+ *total = COSTS_N_INSNS (1);
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (3);
+ return false;
case SIGN_EXTEND:
- if (arm_arch_thumb2 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ *total = 0;
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (1);
+
+ if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ if (arm_arch6)
+ {
+ if (GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (1);
+ }
+ else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (2);
+ }
+
+ return false;
+ }
- if (GET_MODE (XEXP (x, 0)) == QImode)
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
/* Fall through */
case ZERO_EXTEND:
- if (arm_arch6 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
-
- switch (GET_MODE (XEXP (x, 0)))
+ *total = 0;
+ if (GET_MODE_CLASS (mode) == MODE_INT)
{
- case QImode:
- return (1 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (1);
- case HImode:
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ if (arm_arch6)
+ {
+ if (GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (1);
+ }
+ else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
+ 1 : 2);
+ }
- case SImode:
- return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ return false;
+ }
+ switch (GET_MODE (XEXP (x, 0)))
+ {
case V8QImode:
case V4HImode:
case V2SImode:
case V4QImode:
case V2HImode:
- return 1;
+ *total = COSTS_N_INSNS (1);
+ return false;
default:
gcc_unreachable ();
}
gcc_unreachable ();
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+
case CONST_INT:
- if (const_ok_for_arm (INTVAL (x)))
- return outer == SET ? 2 : -1;
- else if (outer == AND
- && const_ok_for_arm (~INTVAL (x)))
- return -1;
- else if ((outer == COMPARE
- || outer == PLUS || outer == MINUS)
- && const_ok_for_arm (-INTVAL (x)))
- return -1;
+ if (const_ok_for_arm (INTVAL (x))
+ || const_ok_for_arm (~INTVAL (x)))
+ *total = COSTS_N_INSNS (1);
else
- return 5;
+ *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
+ INTVAL (x), NULL_RTX,
+ NULL_RTX, 0, 0));
+ return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
- return 6;
+ *total = COSTS_N_INSNS (3);
+ return true;
case HIGH:
+ *total = COSTS_N_INSNS (1);
+ return true;
+
case LO_SUM:
- return (outer == SET) ? 1 : -1;
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
case CONST_DOUBLE:
- if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
- return outer == SET ? 2 : -1;
- else if ((outer == COMPARE || outer == PLUS)
- && neg_const_double_rtx_ok_for_fpa (x))
- return -1;
- return 7;
+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (4);
+ return true;
+
+ default:
+ *total = COSTS_N_INSNS (4);
+ return false;
+ }
+}
+
+/* Estimates the size cost of thumb1 instructions.
+ For now most of the code is copied from thumb1_rtx_costs. We need more
+ fine grain tuning when we have more related test cases. */
+static inline int
+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ case PLUS:
+ case MINUS:
+ case COMPARE:
+ case NEG:
+ case NOT:
+ return COSTS_N_INSNS (1);
+
+ case MULT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ /* Thumb1 mul instruction can't operate on const. We must Load it
+ into a register first. */
+ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
+ return COSTS_N_INSNS (1) + const_size;
+ }
+ return COSTS_N_INSNS (1);
+
+ case SET:
+ return (COSTS_N_INSNS (1)
+ + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
+ + GET_CODE (SET_DEST (x)) == MEM));
+
+ case CONST_INT:
+ if (outer == SET)
+ {
+ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+ return 0;
+ if (thumb_shiftable_const (INTVAL (x)))
+ return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (3);
+ }
+ else if ((outer == PLUS || outer == COMPARE)
+ && INTVAL (x) < 256 && INTVAL (x) > -256)
+ return 0;
+ else if ((outer == IOR || outer == XOR || outer == AND)
+ && INTVAL (x) < 256 && INTVAL (x) >= -256)
+ return COSTS_N_INSNS (1);
+ else if (outer == AND)
+ {
+ int i;
+ /* This duplicates the tests in the andsi3 expander. */
+ for (i = 9; i <= 31; i++)
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+ return COSTS_N_INSNS (2);
+ }
+ else if (outer == ASHIFT || outer == ASHIFTRT
+ || outer == LSHIFTRT)
+ return 0;
+ return COSTS_N_INSNS (2);
+
+ case CONST:
+ case CONST_DOUBLE:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ return COSTS_N_INSNS (3);
+
+ case UDIV:
+ case UMOD:
+ case DIV:
+ case MOD:
+ return 100;
+
+ case TRUNCATE:
+ return 99;
+
+ case AND:
+ case XOR:
+ case IOR:
+ /* XXX guess. */
+ return 8;
+
+ case MEM:
+ /* XXX another guess. */
+ /* Memory costs quite a lot for the first word, but subsequent words
+ load at the equivalent of a single insn each. */
+ return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+ + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ ? 4 : 0));
+
+ case IF_THEN_ELSE:
+ /* XXX a guess. */
+ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+ return 14;
+ return 2;
+
+ case ZERO_EXTEND:
+ /* XXX still guessing. */
+ switch (GET_MODE (XEXP (x, 0)))
+ {
+ case QImode:
+ return (1 + (mode == DImode ? 4 : 0)
+ + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+ case HImode:
+ return (4 + (mode == DImode ? 4 : 0)
+ + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+ case SImode:
+ return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+ default:
+ return 99;
+ }
default:
return 99;
/* RTX costs when optimizing for size. */
static bool
-arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total)
{
enum machine_mode mode = GET_MODE (x);
-
if (TARGET_THUMB1)
{
- /* XXX TBD. For now, use the standard costs. */
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_size_rtx_costs (x, code, outer_code);
return true;
}
return false;
case MINUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case PLUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
}
+ /* A shift as a part of ADD costs nothing. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
+ *total += rtx_cost (XEXP (x, 1), code, false);
+ return true;
+ }
+
/* Fall through */
case AND: case XOR: case IOR:
if (mode == SImode)
return false;
case NEG:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case ABS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
case CONST_INT:
if (const_ok_for_arm (INTVAL (x)))
- *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
+ /* A multiplication by a constant requires another instruction
+ to load the constant to a register. */
+ *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
+ ? 1 : 0);
else if (const_ok_for_arm (~INTVAL (x)))
*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
else if (const_ok_for_arm (-INTVAL (x)))
/* RTX costs when optimizing for size. */
static bool
-arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+arm_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
if (!speed)
- return arm_size_rtx_costs (x, code, outer_code, total);
+ return arm_size_rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code, total);
else
- return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total);
+ return current_tune->rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code,
+ total, speed);
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
static bool
-arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
+ *total = COSTS_N_INSNS (cost);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
- *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);;
}
}
/* RTX cost for cores with a fast multiply unit (M variants). */
static bool
-arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS(2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS(cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (4);
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
+
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
so it can be ignored. */
static bool
-arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
switch (code)
{
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) != MULT)
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
+
+ /* A COMPARE of a MULT is slow on XScale; the muls instruction
+ will stall until the multiplication is complete. */
+ *total = COSTS_N_INSNS (3);
+ return false;
+
case MULT:
/* There is no point basing this on the tuning, since it is always the
fast variant if it exists at all. */
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
- & (unsigned HOST_WIDE_INT) 0xffffffff);
- int cost, const_ok = const_ok_for_arm (i);
+ /* If operand 1 is a constant we can more accurately
+ calculate the cost of the multiply. The multiplier can
+ retire 15 bits on the first cycle and a further 12 on the
+ second. We do, of course, have to load the constant into
+ a register first. */
+ unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+ /* There's a general overhead of one cycle. */
+ int cost = 1;
unsigned HOST_WIDE_INT masked_const;
- /* The cost will be related to two insns.
- First a load of the constant (MOV or LDR), then a multiply. */
- cost = 2;
- if (! const_ok)
- cost += 1; /* LDR is probably more expensive because
- of longer result latency. */
+ if (i & 0x80000000)
+ i = ~i;
+
+ i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
masked_const = i & 0xffff8000;
- if (masked_const != 0 && masked_const != 0xffff8000)
+ if (masked_const != 0)
{
+ cost++;
masked_const = i & 0xf8000000;
- if (masked_const == 0 || masked_const == 0xf8000000)
- cost += 1;
- else
- cost += 2;
+ if (masked_const != 0)
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS (cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (3);
+ return false;
+ }
- case COMPARE:
- /* A COMPARE of a MULT is slow on XScale; the muls instruction
- will stall until the multiplication is complete. */
- if (GET_CODE (XEXP (x, 0)) == MULT)
- *total = 4 + rtx_cost (XEXP (x, 0), code, true);
- else
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* RTX costs for 9e (and later) cores. */
static bool
-arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
- int nonreg_cost;
- int cost;
if (TARGET_THUMB1)
{
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 3;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- {
- *total = 30;
- return true;
- }
if (mode == DImode)
{
- cost = 7;
- nonreg_cost = 8;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
- else
+
+ if (mode == SImode)
{
- cost = 2;
- nonreg_cost = 4;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
- *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* All address computations that can be done are free, but rtx cost returns
if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
return 10;
- if (c == PLUS || c == MINUS)
+ if (c == PLUS)
{
- if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
return 2;
if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
init_fp_table ();
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- r = REAL_VALUE_NEGATE (r);
+ r = real_value_negate (&r);
if (REAL_VALUE_MINUS_ZERO (r))
return 0;
/* Extract sign, exponent and mantissa. */
sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
- r = REAL_VALUE_ABS (r);
+ r = real_value_abs (&r);
exponent = REAL_EXP (&r);
/* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
highest (sign) bit, with a fixed binary point at bit point_pos.
break; \
}
- unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+ unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
unsigned char bytes[16];
int immtype = -1, matches;
operands would use up an additional register and take an extra instruction,
for no particular gain. */
-void
-neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
- rtx (*reduc) (rtx, rtx, rtx))
+void
+neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+ rtx (*reduc) (rtx, rtx, rtx))
+{
+ enum machine_mode inner = GET_MODE_INNER (mode);
+ unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
+ rtx tmpsum = op1;
+
+ for (i = parts / 2; i >= 1; i /= 2)
+ {
+ rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
+ emit_insn (reduc (dest, tmpsum, tmpsum));
+ tmpsum = dest;
+ }
+}
+
+/* If VALS is a vector constant that can be loaded into a register
+ using VDUP, generate instructions to do so and return an RTX to
+ assign to the register. Otherwise return NULL_RTX. */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true;
+ rtx x;
+ int i;
+
+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+ return NULL_RTX;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (!all_same)
+ /* The elements are not all the same. We could handle repeating
+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+ {0, C, 0, C, 0, C, 0, C} which can be loaded using
+ vdup.i16). */
+ return NULL_RTX;
+
+ /* We can load this constant by using VDUP and a constant in a
+ single ARM register. This will be cheaper than a vector
+ load. */
+
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, efficiently into a
+ register. Returns an RTX to copy into the register, or NULL_RTX
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */
+
+rtx
+neon_make_constant (rtx vals)
{
- enum machine_mode inner = GET_MODE_INNER (mode);
- unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
- rtx tmpsum = op1;
+ enum machine_mode mode = GET_MODE (vals);
+ rtx target;
+ rtx const_vec = NULL_RTX;
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_const = 0;
+ int i;
- for (i = parts / 2; i >= 1; i /= 2)
+ if (GET_CODE (vals) == CONST_VECTOR)
+ const_vec = vals;
+ else if (GET_CODE (vals) == PARALLEL)
{
- rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
- emit_insn (reduc (dest, tmpsum, tmpsum));
- tmpsum = dest;
+ /* A CONST_VECTOR must contain only CONST_INTs and
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+ Only store valid constants in a CONST_VECTOR. */
+ for (i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ n_const++;
+ }
+ if (n_const == n_elts)
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
}
+ else
+ gcc_unreachable ();
+
+ if (const_vec != NULL
+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */
+ return const_vec;
+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
+ pipeline cycle; creating the constant takes one or two ARM
+ pipeline cycles. */
+ return target;
+ else if (const_vec != NULL_RTX)
+ /* Load from constant pool. On Cortex-A8 this takes two cycles
+ (for either double or quad vectors). We can not take advantage
+ of single-cycle VLD1 because we need a PC-relative addressing
+ mode. */
+ return const_vec;
+ else
+ /* A PARALLEL containing something not valid inside CONST_VECTOR.
+ We can not construct an initializer. */
+ return NULL_RTX;
}
-/* Initialize a vector with non-constant elements. FIXME: We can do better
- than the current implementation (building a vector on the stack and then
- loading it) in many cases. See rs6000.c. */
+/* Initialize vector TARGET to VALS. */
void
neon_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner = GET_MODE_INNER (mode);
- unsigned int i, n_elts = GET_MODE_NUNITS (mode);
- rtx mem;
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true;
+ rtx x, mem;
+ int i;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ ++n_var, one_var = i;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (n_var == 0)
+ {
+ rtx constant = neon_make_constant (vals);
+ if (constant != NULL_RTX)
+ {
+ emit_move_insn (target, constant);
+ return;
+ }
+ }
+
+ /* Splat a single non-constant element if we can. */
+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+ {
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N)));
+ return;
+ }
+
+ /* One field is non-constant. Load constant then overwrite varying
+ field. This is more efficient than using the stack. */
+ if (n_var == 1)
+ {
+ rtx copy = copy_rtx (vals);
+ rtvec ops;
- gcc_assert (VECTOR_MODE_P (mode));
+ /* Load constant part of vector, substitute neighboring value for
+ varying element. */
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+ neon_expand_vector_init (target, copy);
+ /* Insert variable. */
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+ ops = gen_rtvec (3, x, target, GEN_INT (one_var));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
+ return;
+ }
+
+ /* Construct the vector in memory one field at a time
+ and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
- XVECEXP (vals, 0, i));
-
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
}
/* Return TRUE if OP is a memory operand which we can load or store a vector
- to/from. If CORE is true, we're moving from ARM registers not Neon
- registers. */
+ to/from. TYPE is one of the following values:
+ 0 - Vector load/stor (vldr)
+ 1 - Core registers (ldm)
+ 2 - Element/structure loads (vld1)
+ */
int
-neon_vector_mem_operand (rtx op, bool core)
+neon_vector_mem_operand (rtx op, int type)
{
rtx ind;
return arm_address_register_rtx_p (ind, 0);
/* Allow post-increment with Neon registers. */
- if (!core && GET_CODE (ind) == POST_INC)
+ if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
-#if 0
- /* FIXME: We can support this too if we use VLD1/VST1. */
- if (!core
- && GET_CODE (ind) == POST_MODIFY
- && arm_address_register_rtx_p (XEXP (ind, 0), 0)
- && GET_CODE (XEXP (ind, 1)) == PLUS
- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
- ind = XEXP (ind, 1);
-#endif
+ /* FIXME: vld1 allows register post-modify. */
/* Match:
(plus (reg)
(const)). */
- if (!core
+ if (type == 0
&& GET_CODE (ind) == PLUS
&& GET_CODE (XEXP (ind, 0)) == REG
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
enum reg_class
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
{
+ if (mode == HFmode)
+ {
+ if (!TARGET_NEON_FP16)
+ return GENERAL_REGS;
+ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+ return NO_REGS;
+ return GENERAL_REGS;
+ }
+
if (TARGET_NEON
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
- && neon_vector_mem_operand (x, FALSE))
+ && neon_vector_mem_operand (x, 0))
return NO_REGS;
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
}
}
-/* Must not copy a SET whose source operand is PC-relative. */
+/* Must not copy any rtx that uses a pc-relative address. */
+
+static int
+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
+{
+ if (GET_CODE (*x) == UNSPEC
+ && XINT (*x, 1) == UNSPEC_PIC_BASE)
+ return 1;
+ return 0;
+}
static bool
arm_cannot_copy_insn_p (rtx insn)
{
- rtx pat = PATTERN (insn);
-
- if (GET_CODE (pat) == SET)
- {
- rtx rhs = SET_SRC (pat);
-
- if (GET_CODE (rhs) == UNSPEC
- && XINT (rhs, 1) == UNSPEC_PIC_BASE)
- return TRUE;
-
- if (GET_CODE (rhs) == MEM
- && GET_CODE (XEXP (rhs, 0)) == UNSPEC
- && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
- return TRUE;
- }
-
- return FALSE;
+ return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
}
enum rtx_code
/* Don't accept any offset that will require multiple
instructions to handle, since this would cause the
arith_adjacentmem pattern to output an overlong sequence. */
- if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
+ if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
return 0;
/* Don't allow an eliminable register: register elimination can make
return 0;
}
+/* Return true iff it would be profitable to turn a sequence of NOPS loads
+ or stores (depending on IS_STORE) into a load-multiple or store-multiple
+ instruction. ADD_OFFSET is nonzero if the base address register needs
+ to be modified with an add instruction before we can use it. */
+
+static bool
+multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
+ int nops, HOST_WIDE_INT add_offset)
+ {
+ /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
+ if the offset isn't small enough. The reason 2 ldrs are faster
+ is because these ARMs are able to do more than one cache access
+ in a single cycle. The ARM9 and StrongARM have Harvard caches,
+ whilst the ARM8 has a double bandwidth cache. This means that
+ these cores can do both an instruction fetch and a data fetch in
+ a single cycle, so the trick of calculating the address into a
+ scratch register (one of the result regs) and then doing a load
+ multiple actually becomes slower (and no smaller in code size).
+ That is the transformation
+
+ ldr rd1, [rbase + offset]
+ ldr rd2, [rbase + offset + 4]
+
+ to
+
+ add rd1, rbase, offset
+ ldmia rd1, {rd1, rd2}
+
+ produces worse code -- '3 cycles + any stalls on rd2' instead of
+ '2 cycles + any stalls on rd2'. On ARMs with only one cache
+ access per cycle, the first sequence could never complete in less
+ than 6 cycles, whereas the ldm sequence would only take 5 and
+ would make better use of sequential accesses if not hitting the
+ cache.
+
+ We cheat here and test 'arm_ld_sched' which we currently know to
+ only be true for the ARM8, ARM9 and StrongARM. If this ever
+ changes, then the test below needs to be reworked. */
+ if (nops == 2 && arm_ld_sched && add_offset != 0)
+ return false;
+
+ return true;
+}
+
+/* Subroutine of load_multiple_sequence and store_multiple_sequence.
+ Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
+ an array ORDER which describes the sequence to use when accessing the
+ offsets that produces an ascending order. In this sequence, each
+ offset must be larger by exactly 4 than the previous one. ORDER[0]
+ must have been filled in with the lowest offset by the caller.
+ If UNSORTED_REGS is nonnull, it is an array of register numbers that
+ we use to verify that ORDER produces an ascending order of registers.
+ Return true if it was possible to construct such an order, false if
+ not. */
+
+static bool
+compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
+ int *unsorted_regs)
+{
+ int i;
+ for (i = 1; i < nops; i++)
+ {
+ int j;
+
+ order[i] = order[i - 1];
+ for (j = 0; j < nops; j++)
+ if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
+ {
+ /* We must find exactly one offset that is higher than the
+ previous one by 4. */
+ if (order[i] != order[i - 1])
+ return false;
+ order[i] = j;
+ }
+ if (order[i] == order[i - 1])
+ return false;
+ /* The register numbers must be ascending. */
+ if (unsorted_regs != NULL
+ && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
+ return false;
+ }
+ return true;
+}
+
int
load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
HOST_WIDE_INT *load_offset)
{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
int base_reg = -1;
- int i;
+ int i, ldm_case;
- /* Can only handle 2, 3, or 4 insns at present,
- though could be easily extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+ easily extended if required. */
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
- memset (order, 0, 4 * sizeof (int));
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
== CONST_INT)))
{
if (i == 0)
- {
- base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
- }
+ base_reg = REGNO (reg);
else
{
if (base_reg != (int) REGNO (reg))
/* Not addressed from the same base register. */
return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
}
+ unsorted_regs[i] = (GET_CODE (operands[i]) == REG
+ ? REGNO (operands[i])
+ : REGNO (SUBREG_REG (operands[i])));
/* If it isn't an integer register, or if it overwrites the
base register but isn't the last insn in the list, then
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
-
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
-
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
-
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
+ return 0;
if (base)
{
}
if (unsorted_offsets[order[0]] == 0)
- return 1; /* ldmia */
-
- if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
- return 2; /* ldmib */
-
- if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* ldmda */
-
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* ldmdb */
-
- /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
- if the offset isn't small enough. The reason 2 ldrs are faster
- is because these ARMs are able to do more than one cache access
- in a single cycle. The ARM9 and StrongARM have Harvard caches,
- whilst the ARM8 has a double bandwidth cache. This means that
- these cores can do both an instruction fetch and a data fetch in
- a single cycle, so the trick of calculating the address into a
- scratch register (one of the result regs) and then doing a load
- multiple actually becomes slower (and no smaller in code size).
- That is the transformation
-
- ldr rd1, [rbase + offset]
- ldr rd2, [rbase + offset + 4]
-
- to
-
- add rd1, rbase, offset
- ldmia rd1, {rd1, rd2}
-
- produces worse code -- '3 cycles + any stalls on rd2' instead of
- '2 cycles + any stalls on rd2'. On ARMs with only one cache
- access per cycle, the first sequence could never complete in less
- than 6 cycles, whereas the ldm sequence would only take 5 and
- would make better use of sequential accesses if not hitting the
- cache.
+ ldm_case = 1; /* ldmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ ldm_case = 2; /* ldmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ ldm_case = 3; /* ldmda */
+ else if (unsorted_offsets[order[nops - 1]] == -4)
+ ldm_case = 4; /* ldmdb */
+ else if (const_ok_for_arm (unsorted_offsets[order[0]])
+ || const_ok_for_arm (-unsorted_offsets[order[0]]))
+ ldm_case = 5;
+ else
+ return 0;
- We cheat here and test 'arm_ld_sched' which we currently know to
- only be true for the ARM8, ARM9 and StrongARM. If this ever
- changes, then the test below needs to be reworked. */
- if (nops == 2 && arm_ld_sched)
+ if (!multiple_operation_profitable_p (false, nops,
+ ldm_case == 5
+ ? unsorted_offsets[order[0]] : 0))
return 0;
- /* Can't do it without setting up the offset, only do this if it takes
- no more than one insn. */
- return (const_ok_for_arm (unsorted_offsets[order[0]])
- || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
+ return ldm_case;
}
const char *
emit_ldm_seq (rtx *operands, int nops)
{
- int regs[4];
+ int regs[MAX_LDM_STM_OPS];
int base_reg;
HOST_WIDE_INT offset;
char buf[100];
store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
HOST_WIDE_INT * load_offset)
{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
int base_reg = -1;
- int i;
+ int i, stm_case;
- /* Can only handle 2, 3, or 4 insns at present, though could be easily
- extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+ easily extended if required. */
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
- memset (order, 0, 4 * sizeof (int));
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
&& (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
== CONST_INT)))
{
+ unsorted_regs[i] = (GET_CODE (operands[i]) == REG
+ ? REGNO (operands[i])
+ : REGNO (SUBREG_REG (operands[i])));
if (i == 0)
- {
- base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
- }
- else
- {
- if (base_reg != (int) REGNO (reg))
- /* Not addressed from the same base register. */
- return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
- }
+ base_reg = REGNO (reg);
+ else if (base_reg != (int) REGNO (reg))
+ /* Not addressed from the same base register. */
+ return 0;
/* If it isn't an integer register, then we can't do this. */
if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
-
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
-
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
-
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
+ return 0;
if (base)
{
}
if (unsorted_offsets[order[0]] == 0)
- return 1; /* stmia */
-
- if (unsorted_offsets[order[0]] == 4)
- return 2; /* stmib */
-
- if (unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* stmda */
+ stm_case = 1; /* stmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ stm_case = 2; /* stmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ stm_case = 3; /* stmda */
+ else if (unsorted_offsets[order[nops - 1]] == -4)
+ stm_case = 4; /* stmdb */
+ else
+ return 0;
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* stmdb */
+ if (!multiple_operation_profitable_p (false, nops, 0))
+ return 0;
- return 0;
+ return stm_case;
}
const char *
emit_stm_seq (rtx *operands, int nops)
{
- int regs[4];
+ int regs[MAX_LDM_STM_OPS];
int base_reg;
HOST_WIDE_INT offset;
char buf[100];
/* A compare with a shifted operand. Because of canonicalization, the
comparison will have to be swapped when we emit the assembler. */
- if (GET_MODE (y) == SImode && GET_CODE (y) == REG
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|| GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
|| GET_CODE (x) == ROTATERT))
/* This operation is performed swapped, but since we only rely on the Z
flag we don't need an additional mode. */
- if (GET_MODE (y) == SImode && REG_P (y)
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& GET_CODE (x) == NEG
&& (op == EQ || op == NE))
return CC_Zmode;
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ - (count * 8)))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
}
par = emit_insn (par);
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
RTX_FRAME_RELATED_P (par) = 1;
return count * 8;
return "";
}
-/* Output a 'call' insn that is a reference in memory. */
+/* Output a 'call' insn that is a reference in memory. This is
+ disabled for ARMv5 and we prefer a blx instead because otherwise
+ there's a significant performance overhead. */
const char *
output_call_mem (rtx *operands)
{
- if (TARGET_INTERWORK && !arm_arch5)
+ gcc_assert (!arm_arch5);
+ if (TARGET_INTERWORK)
{
output_asm_insn ("ldr%?\t%|ip, %0", operands);
output_asm_insn ("mov%?\t%|lr, %|pc", operands);
first instruction. It's safe to use IP as the target of the
load since the call will kill it anyway. */
output_asm_insn ("ldr%?\t%|ip, %0", operands);
- if (arm_arch5)
- output_asm_insn ("blx%?\t%|ip", operands);
+ output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+ if (arm_arch4t)
+ output_asm_insn ("bx%?\t%|ip", operands);
else
- {
- output_asm_insn ("mov%?\t%|lr, %|pc", operands);
- if (arm_arch4t)
- output_asm_insn ("bx%?\t%|ip", operands);
- else
- output_asm_insn ("mov%?\t%|pc, %|ip", operands);
- }
+ output_asm_insn ("mov%?\t%|pc, %|ip", operands);
}
else
{
return "";
}
-
-/* Emit a MOVW/MOVT pair. */
-void arm_emit_movpair (rtx dest, rtx src)
-{
- emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
- emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
-}
-
+void
+arm_emit_movpair (rtx dest, rtx src)
+ {
+ /* If the src is an immediate, simplify it. */
+ if (CONST_INT_P (src))
+ {
+ HOST_WIDE_INT val = INTVAL (src);
+ emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+ if ((val >> 16) & 0x0000ffff)
+ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+ GEN_INT (16)),
+ GEN_INT ((val >> 16) & 0x0000ffff));
+ return;
+ }
+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+ }
/* Output a move from arm registers to an fpa registers.
OPERANDS[0] is an fpa register.
}
else
{
- /* IWMMXT allows offsets larger than ldrd can handle,
- fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
- && (INTVAL(otherops[2]) <= -256
- || INTVAL(otherops[2]) >= 256))
+ /* Use a single insn if we can.
+ FIXME: IWMMXT allows offsets larger than ldrd can
+ handle, fix these up with a pair of ldr. */
+ if (TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
+ else
{
output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
- otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
}
- else
- output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
}
}
else
{
- /* IWMMXT allows offsets larger than ldrd can handle,
+ /* Use a single insn if we can.
+ FIXME: IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
- && (INTVAL(otherops[2]) <= -256
- || INTVAL(otherops[2]) >= 256))
+ if (TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
+ else
{
- otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
- otherops[0] = operands[0];
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
}
- else
- /* We only allow constant increments, so this is safe. */
- output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
}
break;
operands[1] = otherops[0];
if (TARGET_LDRD
&& (GET_CODE (otherops[2]) == REG
+ || TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
&& INTVAL (otherops[2]) > -256
&& INTVAL (otherops[2]) < 256)))
/* IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
+ if (!TARGET_THUMB2
+ && GET_CODE (otherops[2]) == CONST_INT
&& (INTVAL(otherops[2]) <= -256
|| INTVAL(otherops[2]) >= 256))
{
- rtx reg1;
- reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
{
output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
- otherops[0] = reg1;
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
}
else
{
- otherops[0] = reg1;
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
- otherops[0] = operands[1];
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
}
}
}
if (TARGET_LDRD
&& (GET_CODE (otherops[2]) == REG
+ || TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
&& INTVAL (otherops[2]) > -256
&& INTVAL (otherops[2]) < 256)))
default:
otherops[0] = adjust_address (operands[0], SImode, 4);
- otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
+ otherops[1] = operands[1];
output_asm_insn ("str%?\t%1, %0", operands);
- output_asm_insn ("str%?\t%1, %0", otherops);
+ output_asm_insn ("str%?\t%H1, %0", otherops);
}
}
}
/* Output a move, load or store for quad-word vectors in ARM registers. Only
- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
+ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
const char *
output_move_quad (rtx *operands)
ops[1] = reg;
break;
+ case PRE_DEC:
+ /* FIXME: We should be using vld1/vst1 here in BE mode? */
+ templ = "v%smdb%%?\t%%0!, %%h1";
+ ops[0] = XEXP (addr, 0);
+ ops[1] = reg;
+ break;
+
case POST_MODIFY:
/* FIXME: Not currently enabled in neon_vector_mem_operand. */
gcc_unreachable ();
{
/* We're only using DImode here because it's a convenient size. */
ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
- ops[1] = adjust_address (mem, SImode, 8 * i);
+ ops[1] = adjust_address (mem, DImode, 8 * i);
if (reg_overlap_mentioned_p (ops[0], mem))
{
gcc_assert (overlap == -1);
&& crtl->uses_pic_offset_table)
save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
}
+ else if (IS_VOLATILE(func_type))
+ {
+ /* For noreturn functions we historically omitted register saves
+ altogether. However this really messes up debugging. As a
+ compromise save just the frame pointers. Combined with the link
+ register saved elsewhere this should be sufficient to get
+ a backtrace. */
+ if (frame_pointer_needed)
+ save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
+ }
else
{
/* In the normal case we only need to save those registers
| (1 << LR_REGNUM)
| (1 << PC_REGNUM);
- /* Volatile functions do not return, so there
- is no need to save any other registers. */
- if (IS_VOLATILE (func_type))
- return save_reg_mask;
-
save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
/* Decide if we need to save the link register.
sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
- return_used_this_function = 1;
+ cfun->machine->return_used_this_function = 1;
offsets = arm_get_frame_offsets ();
live_regs_mask = offsets->saved_regs_mask;
gcc_assert (stack_adjust == 0 || stack_adjust == 4);
if (stack_adjust && arm_arch5 && TARGET_ARM)
- sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
else
{
/* If we can't use ldmib (SA110 bug),
then try to pop r3 instead. */
if (stack_adjust)
live_regs_mask |= 1 << 3;
- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
+
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
}
}
else
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "pop%s\t{", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
p = instr + strlen (instr);
if (crtl->calls_eh_return)
asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
- return_used_this_function = 0;
}
const char *
/* If we have already generated the return instruction
then it is futile to generate anything else. */
- if (use_return_insn (FALSE, sibling) && return_used_this_function)
+ if (use_return_insn (FALSE, sibling) &&
+ (cfun->machine->return_used_this_function != 0))
return "";
func_type = arm_current_func_type ();
/* This variable is for the Virtual Frame Pointer, not VFP regs. */
int vfp_offset = offsets->frame;
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
}
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
if (TARGET_HARD_FLOAT && TARGET_VFP)
{
- start_reg = FIRST_VFP_REGNUM;
- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+ int end_reg = LAST_VFP_REGNUM + 1;
+
+ /* Scan the registers in reverse order. We need to match
+ any groupings made in the prologue and generate matching
+ pop operations. */
+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
{
if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+ && (!df_regs_ever_live_p (reg + 1)
+ || call_used_regs[reg + 1]))
{
- if (start_reg != reg)
+ if (end_reg > reg + 2)
vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
- start_reg = reg + 2;
+ (reg + 2 - FIRST_VFP_REGNUM) / 2,
+ (end_reg - (reg + 2)) / 2);
+ end_reg = reg;
}
- }
- if (start_reg != reg)
- vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
+ }
+ if (end_reg > reg + 2)
+ vfp_output_fldmd (f, SP_REGNUM, 0,
+ (end_reg - (reg + 2)) / 2);
}
+
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
/* ??? Probably not safe to set this here, since it assumes that a
function will be emitted as assembly immediately after we generate
RTL for it. This does not happen for inline functions. */
- return_used_this_function = 0;
+ cfun->machine->return_used_this_function = 0;
}
else /* TARGET_32BIT */
{
offsets = arm_get_frame_offsets ();
gcc_assert (!use_return_insn (FALSE, NULL)
- || !return_used_this_function
+ || (cfun->machine->return_used_this_function != 0)
|| offsets->saved_regs == offsets->outgoing_args
|| frame_pointer_needed);
/* For the body of the insn we are going to generate an UNSPEC in
parallel with several USEs. This allows the insn to be recognized
- by the push_multi pattern in the arm.md file. The insn looks
- something like this:
+ by the push_multi pattern in the arm.md file.
+
+ The body of the insn looks something like this:
(parallel [
- (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
+ (set (mem:BLK (pre_modify:SI (reg:SI sp)
+ (const_int:SI <num>)))
(unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
- (use (reg:SI 11 fp))
- (use (reg:SI 12 ip))
- (use (reg:SI 14 lr))
- (use (reg:SI 15 pc))
+ (use (reg:SI XX))
+ (use (reg:SI YY))
+ ...
])
For the frame note however, we try to be more explicit and actually
(sequence [
(set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
(set (mem:SI (reg:SI sp)) (reg:SI r4))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+ ...
])
- This sequence is used both by the code to support stack unwinding for
- exceptions handlers and the code to generate dwarf2 frame debugging. */
+ FIXME:: In an ideal world the PRE_MODIFY would not exist and
+ instead we'd have a parallel expression detailing all
+ the stores to the various memory addresses so that debug
+ information is more up-to-date. Remember however while writing
+ this to take care of the constraints with the push instruction.
+
+ Note also that this has to be taken care of for the VFP registers.
+
+ For more see PR43399. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -4 * num_regs))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
{
tmp
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (SImode,
- plus_constant (stack_pointer_rtx,
- 4 * j)),
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx,
+ 4 * j)),
reg);
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, 0) = tmp;
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
return par;
}
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -12 * count))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
XVECEXP (dwarf, 0, 0) = tmp;
par = emit_insn (par);
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
return par;
}
{
int reg = -1;
- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
- {
- if ((offsets->saved_regs_mask & (1 << i)) == 0)
- {
- reg = i;
- break;
- }
- }
-
- if (reg == -1 && arm_size_return_regs () <= 12
- && !crtl->tail_call_emit)
+ /* If it is safe to use r3, then do so. This sometimes
+ generates better code on Thumb-2 by avoiding the need to
+ use 32-bit push/pop instructions. */
+ if (!crtl->tail_call_emit
+ && arm_size_return_regs () <= 12)
{
- /* Push/pop an argument register (r3) if all callee saved
- registers are already being pushed. */
reg = 3;
}
+ else
+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+ {
+ if ((offsets->saved_regs_mask & (1 << i)) == 0)
+ {
+ reg = i;
+ break;
+ }
+ }
if (reg != -1)
{
}
}
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. Frame pointer elimination is automatically handled.
+
+ All eliminations are permissible. Note that ARG_POINTER_REGNUM and
+ HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
+ pointer, we must eliminate FRAME_POINTER_REGNUM into
+ HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
+ ARG_POINTER_REGNUM. */
+
+bool
+arm_can_eliminate (const int from, const int to)
+{
+ return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
+ (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
+ (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
+ (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
+ true);
+}
/* Emit RTL to save coprocessor registers on function entry. Returns the
number of bytes pushed. */
for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
{
- insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
insn = gen_rtx_MEM (V2SImode, insn);
insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
RTX_FRAME_RELATED_P (insn) = 1;
/* Save any floating point call-saved registers used by this
function. */
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
- insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
insn = gen_rtx_MEM (XFmode, insn);
insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
RTX_FRAME_RELATED_P (insn) = 1;
dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
plus_constant (stack_pointer_rtx, amount));
RTX_FRAME_RELATED_P (dwarf) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
RTX_FRAME_RELATED_P (insn) = 1;
dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
insn = gen_movsi (r0, stack_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- dwarf, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
emit_insn (insn);
emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
emit_insn (gen_movsi (stack_pointer_rtx, r1));
plus_constant (stack_pointer_rtx,
-fp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- dwarf, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
else
{
{
REAL_VALUE_TYPE r;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- r = REAL_VALUE_NEGATE (r);
+ r = real_value_negate (&r);
fprintf (stream, "%s", fp_const_from_val (&r));
}
return;
}
return;
+ /* Print the high single-precision register of a VFP double-precision
+ register. */
+ case 'p':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+ }
+ return;
+
/* Print a VFP/Neon double precision or quad precision register name. */
case 'P':
case 'q':
}
return;
+ /* Memory operand for vld1/vst1 instruction. */
+ case 'A':
+ {
+ rtx addr;
+ bool postinc = FALSE;
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ if (GET_CODE (addr) == POST_INC)
+ {
+ postinc = 1;
+ addr = XEXP (addr, 0);
+ }
+ asm_fprintf (stream, "[%r]", REGNO (addr));
+ if (postinc)
+ fputs("!", stream);
+ }
+ return;
+
+ /* Translate an S register number into a D register number and element index. */
+ case 'y':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+ }
+ return;
+
+ /* Register specifier for vld1.16/vst1.16. Translate the S register
+ number into a D register number and element index. */
+ case 'z':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
+ }
+ return;
+
default:
if (x == 0)
{
default:
gcc_assert (GET_CODE (x) != NEG);
fputc ('#', stream);
+ if (GET_CODE (x) == HIGH)
+ {
+ fputs (":lower16:", stream);
+ x = XEXP (x, 0);
+ }
+
output_addr_const (stream, x);
break;
}
get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
- int code;
+ enum arm_cond_code code;
enum rtx_code comp_code = GET_CODE (comparison);
if (GET_MODE_CLASS (mode) != MODE_CC)
reversed if it appears to fail. */
int reverse = 0;
- /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
- taken are clobbered, even if the rtl suggests otherwise. It also
- means that we have to grub around within the jump expression to find
- out what the conditions are when the jump isn't taken. */
- int jump_clobbers = 0;
-
/* If we start with a return insn, we only succeed if we find another one. */
int seeking_return = 0;
int then_not_else = TRUE;
rtx this_insn = start_insn, label = 0;
- /* If the jump cannot be done with one instruction, we cannot
- conditionally execute the instruction in the inverse case. */
- if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
- {
- jump_clobbers = 1;
- return;
- }
-
/* Register the insn jumped to. */
if (reverse)
{
control falls in from somewhere else. */
if (this_insn == label)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
this_insn = next_nonnote_insn (this_insn);
if (this_insn && this_insn == label)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
if (this_insn && this_insn == label
&& insns_skipped < max_insns_skipped)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
}
arm_target_insn = this_insn;
}
- if (jump_clobbers)
- {
- gcc_assert (!reverse);
- arm_current_cc =
- get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
- 0), 0), 1));
- if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
- if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
- }
- else
- {
- /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
- what it was. */
- if (!reverse)
- arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
- 0));
- }
+
+ /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+ what it was. */
+ if (!reverse)
+ arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
if (reverse || then_not_else)
arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
if (mode == DFmode)
return VFP_REGNO_OK_FOR_DOUBLE (regno);
+ /* VFP registers can hold HFmode values, but there is no point in
+ putting them there unless we have hardware conversion insns. */
+ if (mode == HFmode)
+ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+
if (TARGET_NEON)
return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
|| (VALID_NEON_QREG_MODE (mode)
return VALID_IWMMXT_REG_MODE (mode);
}
- /* We allow any value to be stored in the general registers.
+ /* We allow almost any value to be stored in the general registers.
Restrict doubleword quantities to even register pairs so that we can
- use ldrd. Do not allow Neon structure opaque modes in general registers;
- they would use too many. */
+ use ldrd. Do not allow very large Neon structure opaque modes in
+ general registers; they would use too many. */
if (regno <= LAST_ARM_REGNUM)
return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
- && !VALID_NEON_STRUCT_MODE (mode);
+ && ARM_NUM_REGS (mode) <= 4;
if (regno == FRAME_POINTER_REGNUM
|| regno == ARG_POINTER_REGNUM)
/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
not used in arm mode. */
-int
+
+enum reg_class
arm_regno_class (int regno)
{
if (TARGET_THUMB1)
{
#define IWMMXT_BUILTIN(code, string, builtin) \
{ FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, 0, 0 },
+ ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
#define IWMMXT_BUILTIN2(code, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
+ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
TREE_READONLY (decl) = 1;
}
-typedef enum {
+enum neon_builtin_type_bits {
T_V8QI = 0x0001,
T_V4HI = 0x0002,
T_V2SI = 0x0004,
T_TI = 0x0400,
T_EI = 0x0800,
T_OI = 0x1000
-} neon_builtin_type_bits;
+};
#define v8qi_UP T_V8QI
#define v4hi_UP T_V4HI
typedef struct {
const char *name;
const neon_itype itype;
- const neon_builtin_type_bits bits;
+ const int bits;
const enum insn_code codes[T_MAX];
const unsigned int num_vars;
unsigned int base_fcode;
}
static void
+arm_init_fp16_builtins (void)
+{
+ tree fp16_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (fp16_type) = 16;
+ layout_type (fp16_type);
+ (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+}
+
+static void
arm_init_builtins (void)
{
arm_init_tls_builtins ();
if (TARGET_NEON)
arm_init_neon_builtins ();
+
+ if (arm_fp16_format)
+ arm_init_fp16_builtins ();
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */
+
+static const char *
+arm_invalid_parameter_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return N_("function parameters cannot have __fp16 type");
+ return NULL;
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */
+
+static const char *
+arm_invalid_return_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return N_("functions cannot return __fp16 type");
+ return NULL;
+}
+
+/* Implement TARGET_PROMOTED_TYPE. */
+
+static tree
+arm_promoted_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return float_type_node;
+ return NULL_TREE;
+}
+
+/* Implement TARGET_CONVERT_TO_TYPE.
+ Specifically, this hook implements the peculiarity of the ARM
+ half-precision floating-point C semantics that requires conversions between
+ __fp16 to or from double to do an intermediate conversion to float. */
+
+static tree
+arm_convert_to_type (tree type, tree expr)
+{
+ tree fromtype = TREE_TYPE (expr);
+ if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
+ return NULL_TREE;
+ if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
+ || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
+ return convert (type, convert (float_type_node, expr));
+ return NULL_TREE;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
+ This simply adds HFmode as a supported mode; even though we don't
+ implement arithmetic on this type directly, it's supported by
+ optabs conversions, much the way the double-word arithmetic is
+ special-cased in the default hook. */
+
+static bool
+arm_scalar_mode_supported_p (enum machine_mode mode)
+{
+ if (mode == HFmode)
+ return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
+ else
+ return default_scalar_mode_supported_p (mode);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
for (;;)
{
- builtin_arg thisarg = va_arg (ap, int);
+ builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
if (thisarg == NEON_ARG_STOP)
break;
if (push && pushed_words && dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
int pushed_mask = real_regs;
*cfa_offset += pushed_words * 4;
unsigned HOST_WIDE_INT mask = 0xff;
int i;
+ val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
if (val == 0) /* XXX */
return 0;
int had_to_push_lr;
int size;
- if (return_used_this_function)
+ if (cfun->machine->return_used_this_function != 0)
return "";
if (IS_NAKED (arm_current_func_type ()))
}
}
+/* Given the stack offsets and register mask in OFFSETS, decide
+ how many additional registers to push instead of subtracting
+ a constant from SP. */
+static int
+thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
+{
+ HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
+ unsigned long live_regs_mask = offsets->saved_regs_mask;
+ /* Extract a mask of the ones we can give to the Thumb's push instruction. */
+ unsigned long l_mask = live_regs_mask & 0x40ff;
+ /* Then count how many other high registers will need to be pushed. */
+ unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+ int n_free;
+
+ /* If the stack frame size is 512 exactly, we can save one load
+ instruction, which should make this a win even when optimizing
+ for speed. */
+ if (!optimize_size && amount != 512)
+ return 0;
+
+ /* Can't do this if there are high registers to push, or if we
+ are not going to do a push at all. */
+ if (high_regs_pushed != 0 || l_mask == 0)
+ return 0;
+
+ /* Don't do this if thumb1_expand_prologue wants to emit instructions
+ between the push and the stack frame allocation. */
+ if ((flag_pic && arm_pic_register != INVALID_REGNUM)
+ || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
+ return 0;
+
+ for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
+ n_free++;
+
+ if (n_free == 0)
+ return 0;
+ gcc_assert (amount / 4 * 4 == amount);
+
+ if (amount >= 512 && (amount - n_free * 4) < 512)
+ return (amount - 508) / 4;
+ if (amount <= n_free * 4)
+ return amount / 4;
+ return 0;
+}
+
/* Generate the rest of a function's prologue. */
void
thumb1_expand_prologue (void)
stack_pointer_rtx);
amount = offsets->outgoing_args - offsets->saved_regs;
+ amount -= 4 * thumb1_extra_regs_pushed (offsets);
if (amount)
{
if (amount < 512)
plus_constant (stack_pointer_rtx,
-amount));
RTX_FRAME_RELATED_P (dwarf) = 1;
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
}
the stack pointer. */
if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
cfa_offset = cfa_offset + crtl->args.pretend_args_size;
dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
cfa_offset = cfa_offset + 16;
dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
register. */
else if ((l_mask & 0xff) != 0
|| (high_regs_pushed == 0 && l_mask))
- thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
+ {
+ unsigned long mask = l_mask;
+ mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
+ thumb_pushpop (f, mask, 1, &cfa_offset, mask);
+ }
if (high_regs_pushed)
{
}
else
{
- int set_float_abi_attributes = 0;
- switch (arm_fpu_arch)
- {
- case FPUTYPE_FPA:
- fpu_name = "fpa";
- break;
- case FPUTYPE_FPA_EMU2:
- fpu_name = "fpe2";
- break;
- case FPUTYPE_FPA_EMU3:
- fpu_name = "fpe3";
- break;
- case FPUTYPE_MAVERICK:
- fpu_name = "maverick";
- break;
- case FPUTYPE_VFP:
- fpu_name = "vfp";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3D16:
- fpu_name = "vfpv3-d16";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3:
- fpu_name = "vfpv3";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON:
- fpu_name = "neon";
- set_float_abi_attributes = 1;
- break;
- default:
- abort();
- }
- if (set_float_abi_attributes)
+ fpu_name = arm_fpu_desc->name;
+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
{
if (TARGET_HARD_FLOAT)
asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_ABI_FP_16bit_format. */
+ if (arm_fp16_format)
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
+ (int)arm_fp16_format);
+
if (arm_lang_output_object_attributes_hook)
arm_lang_output_object_attributes_hook();
}
return 1;
}
+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
+ HFmode constant pool entries are actually loaded with ldr. */
+void
+arm_emit_fp16_const (rtx c)
+{
+ REAL_VALUE_TYPE r;
+ long bits;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+ bits = real_to_target (NULL, &r, HFmode);
+ if (WORDS_BIG_ENDIAN)
+ assemble_zeros (2);
+ assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
+ if (!WORDS_BIG_ENDIAN)
+ assemble_zeros (2);
+}
+
const char *
arm_output_load_gr (rtx *operands)
{
that way. */
static void
-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
enum machine_mode mode,
tree type,
int *pretend_size,
int second_time ATTRIBUTE_UNUSED)
{
- int nregs = cum->nregs;
- if (nregs & 1
- && ARM_DOUBLEWORD_ALIGN
- && arm_needs_doubleword_align (mode, type))
- nregs++;
-
+ int nregs;
+
cfun->machine->uses_anonymous_args = 1;
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ nregs = pcum->aapcs_ncrn;
+ if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+ nregs++;
+ }
+ else
+ nregs = pcum->nregs;
+
if (nregs < NUM_ARG_REGS)
*pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
}
return !TARGET_AAPCS_BASED;
}
+static enum machine_mode
+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+ enum machine_mode mode,
+ int *punsignedp ATTRIBUTE_UNUSED,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ int for_return ATTRIBUTE_UNUSED)
+{
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ return SImode;
+
+ return mode;
+}
/* AAPCS based ABIs use short enums by default. */
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode))
return true;
- if ((mode == V2SImode)
- || (mode == V4HImode)
- || (mode == V8QImode))
+ if ((TARGET_NEON || TARGET_IWMMXT)
+ && ((mode == V2SImode)
+ || (mode == V4HImode)
+ || (mode == V8QImode)))
return true;
return false;
}
+/* Implements target hook small_register_classes_for_mode_p. */
+bool
+arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+ return TARGET_THUMB1;
+}
+
/* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
ARM insns and therefore guarantee that the shift count is modulo 256.
DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
if (IS_FPA_REGNUM (regno))
return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
- /* FIXME: VFPv3 register numbering. */
if (IS_VFP_REGNUM (regno))
- return 64 + regno - FIRST_VFP_REGNUM;
+ {
+ /* See comment in arm_dwarf_register_span. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return 64 + regno - FIRST_VFP_REGNUM;
+ else
+ return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+ }
if (IS_IWMMXT_GR_REGNUM (regno))
return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
gcc_unreachable ();
}
+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+ GCC models tham as 64 32-bit registers, so we need to describe this to
+ the DWARF generation code. Other registers can use the default. */
+static rtx
+arm_dwarf_register_span (rtx rtl)
+{
+ unsigned regno;
+ int nregs;
+ int i;
+ rtx p;
+
+ regno = REGNO (rtl);
+ if (!IS_VFP_REGNUM (regno))
+ return NULL_RTX;
+
+ /* XXX FIXME: The EABI defines two VFP register ranges:
+ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+ 256-287: D0-D31
+ The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
+ corresponding D register. Until GDB supports this, we shall use the
+ legacy encodings. We also use these encodings for D0-D15 for
+ compatibility with older debuggers. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return NULL_RTX;
+
+ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
+ regno = (regno - FIRST_VFP_REGNUM) / 2;
+ for (i = 0; i < nregs; i++)
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+
+ return p;
+}
#ifdef TARGET_UNWIND_INFO
/* Emit unwind directives for a store-multiple instruction or stack pointer
offset = INTVAL (XEXP (e1, 1));
asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
HARD_FRAME_POINTER_REGNUM, reg,
- INTVAL (XEXP (e1, 1)));
+ offset);
}
else if (GET_CODE (e1) == REG)
{
rtx val;
val = XVECEXP (x, 0, 0);
- reloc = INTVAL (XVECEXP (x, 0, 1));
+ reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
output_addr_const (fp, val);
fputc (')', fp);
return TRUE;
}
+ else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
+ {
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ if (GOT_PCREL)
+ fputs ("+.", fp);
+ fputs ("-(", fp);
+ output_addr_const (fp, XVECEXP (x, 0, 1));
+ fputc (')', fp);
+ return TRUE;
+ }
else if (GET_CODE (x) == CONST_VECTOR)
return arm_emit_vector_const (fp, x);
return "";
}
+/* Output a Thumb-1 casesi dispatch sequence. */
+const char *
+thumb1_output_casesi (rtx *operands)
+{
+ rtx diff_vec = PATTERN (next_real_insn (operands[0]));
+
+ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+ switch (GET_MODE(diff_vec))
+ {
+ case QImode:
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+ "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
+ case HImode:
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+ "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
+ case SImode:
+ return "bl\t%___gnu_thumb1_case_si";
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Output a Thumb-2 casesi instruction. */
const char *
thumb2_output_casesi (rtx *operands)
{
arm_mangle_map_entry *pos = arm_mangle_map;
+ /* The ARM ABI documents (10th October 2008) say that "__va_list"
+ has to be managled as if it is in the "std" namespace. */
+ if (TARGET_AAPCS_BASED
+ && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+ {
+ static bool warned;
+ if (!warned && warn_psabi && !in_system_header)
+ {
+ warned = true;
+ inform (input_location,
+ "the mangling of %<va_list%> has changed in GCC 4.4");
+ }
+ return "St9__va_list";
+ }
+
+ /* Half-precision float. */
+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+ return "Dh";
+
if (TREE_CODE (type) != VECTOR_TYPE)
return NULL;
flag_section_anchors = 2;
}
+/* Implement TARGET_FRAME_POINTER_REQUIRED. */
+
+bool
+arm_frame_pointer_required (void)
+{
+ return (cfun->has_nonlocal_label
+ || SUBTARGET_FRAME_POINTER_REQUIRED
+ || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
+}
+
+/* Only thumb1 can't support conditional execution, so return true if
+ the target is not thumb1. */
+static bool
+arm_have_conditional_execution (void)
+{
+ return !TARGET_THUMB1;
+}
+
#include "gt-arm.h"