/* Output routines for GCC for ARM.
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
#include "obstack.h"
#include "regs.h"
#include "hard-reg-set.h"
-#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
#include "function.h"
#include "expr.h"
#include "optabs.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "recog.h"
+#include "cgraph.h"
#include "ggc.h"
#include "except.h"
-#include "c-pragma.h"
+#include "c-family/c-pragma.h" /* ??? */
#include "integrate.h"
#include "tm_p.h"
#include "target.h"
#include "debug.h"
#include "langhooks.h"
#include "df.h"
+#include "intl.h"
+#include "libfuncs.h"
+#include "params.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
typedef struct minipool_fixup Mfix;
-const struct attribute_spec arm_attribute_table[];
-
void (*arm_lang_output_object_attributes_hook)(void);
/* Forward function declarations. */
+static int arm_compute_static_chain_stack_bytes (void);
static arm_stack_offsets *arm_get_frame_offsets (void);
static void arm_add_gc_roots (void);
static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
inline static int thumb1_index_register_rtx_p (rtx, int);
+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static rtx emit_sfm (int, int);
static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int);
+static void arm_print_operand (FILE *, rtx, int);
+static void arm_print_operand_address (FILE *, rtx);
+static bool arm_print_operand_punct_valid_p (unsigned char code);
static const char *fp_const_from_val (REAL_VALUE_TYPE *);
static arm_cc get_arm_condition_code (rtx);
static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
static unsigned long arm_isr_value (tree);
static unsigned long arm_compute_func_type (void);
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
static int count_insns_for_constant (HOST_WIDE_INT, int);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
+static enum machine_mode arm_promote_function_mode (const_tree,
+ enum machine_mode, int *,
+ const_tree, int);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
+
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
-static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
-static bool arm_size_rtx_costs (rtx, int, int, int *);
-static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
-static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
-static bool arm_xscale_rtx_costs (rtx, int, int, int *);
-static bool arm_9e_rtx_costs (rtx, int, int, int *);
-static int arm_address_cost (rtx);
+static bool arm_have_conditional_execution (void);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_rtx_costs (rtx, int, int, int *, bool);
+static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
static bool arm_cirrus_insn_p (rtx);
static void cirrus_reorg (rtx);
static void arm_init_builtins (void);
-static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static void arm_init_iwmmxt_builtins (void);
static rtx safe_vector_operand (rtx, enum machine_mode);
static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
static rtx emit_set_insn (rtx, rtx);
static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
+static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+ const_tree);
+static int aapcs_select_return_coproc (const_tree, const_tree);
#ifdef OBJECT_FORMAT_ELF
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static bool arm_align_anon_bitfield (void);
static bool arm_return_in_msb (const_tree);
static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
-#ifdef TARGET_UNWIND_INFO
+static bool arm_return_in_memory (const_tree, const_tree);
+#if ARM_UNWIND_INFO
static void arm_unwind_emit (FILE *, rtx);
static bool arm_output_ttype (rtx);
+static void arm_asm_emit_except_personality (rtx);
+static void arm_asm_init_sections (void);
#endif
+static enum unwind_info_type arm_except_unwind_info (void);
static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
+static rtx arm_dwarf_register_span (rtx);
static tree arm_cxx_guard_type (void);
static bool arm_cxx_guard_mask_bit (void);
static bool arm_cxx_class_data_always_comdat (void);
static bool arm_cxx_use_aeabi_atexit (void);
static void arm_init_libfuncs (void);
+static tree arm_build_builtin_va_list (void);
+static void arm_expand_builtin_va_start (tree, rtx);
+static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static void arm_option_override (void);
+static void arm_option_optimization (int, int);
static bool arm_handle_option (size_t, const char *, int);
static void arm_target_help (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
static bool arm_tls_symbol_p (rtx x);
static int arm_issue_rate (void);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static bool arm_output_addr_const_extra (FILE *, rtx);
static bool arm_allocate_stack_slots_for_args (void);
+static const char *arm_invalid_parameter_type (const_tree t);
+static const char *arm_invalid_return_type (const_tree t);
+static tree arm_promoted_type (const_tree t);
+static tree arm_convert_to_type (tree type, tree expr);
+static bool arm_scalar_mode_supported_p (enum machine_mode);
+static bool arm_frame_pointer_required (void);
+static bool arm_can_eliminate (const int, const int);
+static void arm_asm_trampoline_template (FILE *);
+static void arm_trampoline_init (rtx, tree, rtx);
+static rtx arm_trampoline_adjust_address (rtx);
+static rtx arm_pic_static_addr (rtx orig, rtx reg);
+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
+static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+static bool arm_class_likely_spilled_p (reg_class_t);
+static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type,
+ int misalignment,
+ bool is_packed);
+
+\f
+/* Table of machine attributes. */
+static const struct attribute_spec arm_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+ /* Function calls made to this symbol must be done indirectly, because
+ it may lie outside of the 26 bit addressing range of a normal function
+ call. */
+ { "long_call", 0, 0, false, true, true, NULL },
+ /* Whereas these functions are always known to reside within the 26 bit
+ addressing range. */
+ { "short_call", 0, 0, false, true, true, NULL },
+ /* Specify the procedure call conventions for a function. */
+ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
+ /* Interrupt Service Routines have special prologue and epilogue requirements. */
+ { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
+ { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
+ { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
+#ifdef ARM_PE
+ /* ARM/PE has three new attributes:
+ interfacearm - ?
+ dllexport - for exporting a function/variable that will live in a dll
+ dllimport - for importing a function/variable from a dll
+ Microsoft allows multiple declspecs in one __declspec, separating
+ them with spaces. We do NOT support this. Instead, use __declspec
+ multiple times.
+ */
+ { "dllimport", 0, 0, true, false, false, NULL },
+ { "dllexport", 0, 0, true, false, false, NULL },
+ { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+ { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
+#endif
+ { NULL, 0, 0, false, false, false, NULL }
+};
\f
/* Initialize the GCC target structure. */
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
#endif
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
#undef TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER arm_assemble_integer
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND arm_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
+
#undef TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
#define TARGET_HANDLE_OPTION arm_handle_option
#undef TARGET_HELP
#define TARGET_HELP arm_target_help
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arm_option_override
+#undef TARGET_OPTION_OPTIMIZATION
+#define TARGET_OPTION_OPTIMIZATION arm_option_optimization
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
-/* This will be overridden in arm_override_options. */
#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS arm_slowmul_rtx_costs
+#define TARGET_RTX_COSTS arm_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST arm_address_cost
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
-#undef TARGET_PROMOTE_FUNCTION_ARGS
-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
-#undef TARGET_PROMOTE_FUNCTION_RETURN
-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arm_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
+
#undef TARGET_DEFAULT_SHORT_ENUMS
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
#undef TARGET_RETURN_IN_MSB
#define TARGET_RETURN_IN_MSB arm_return_in_msb
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arm_return_in_memory
+
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
-#ifdef TARGET_UNWIND_INFO
-#undef TARGET_UNWIND_EMIT
-#define TARGET_UNWIND_EMIT arm_unwind_emit
+#if ARM_UNWIND_INFO
+#undef TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
/* EABI unwinding tables use a different format for the typeinfo tables. */
#undef TARGET_ASM_TTYPE
#undef TARGET_ARM_EABI_UNWINDER
#define TARGET_ARM_EABI_UNWINDER true
-#endif /* TARGET_UNWIND_INFO */
+
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
+#endif /* ARM_UNWIND_INFO */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
#define TARGET_HAVE_TLS true
#endif
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 4095
+
+/* The minimum is set such that the total size of the block
+ for a particular anchor is -4088 + 1 + 4095 bytes, which is
+ divisible by eight, ensuring natural spacing of anchors. */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -4088
+
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE arm_mangle_type
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
+
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
#endif
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
+
+#undef TARGET_INVALID_PARAMETER_TYPE
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
+
+#undef TARGET_INVALID_RETURN_TYPE
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
+
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE arm_promoted_type
+
+#undef TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arm_can_eliminate
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+ arm_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+ arm_builtin_support_vector_misalignment
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Obstack for minipool constant handling. */
/* True if we are currently building a constant table. */
int making_const_table;
-/* Define the information needed to generate branch insns. This is
- stored from the compare operation. */
-rtx arm_compare_op0, arm_compare_op1;
-
/* The processor for which instructions should be scheduled. */
enum processor_type arm_tune = arm_none;
-/* The default processor used if not overridden by commandline. */
-static enum processor_type arm_default_cpu = arm_none;
-
-/* Which floating point model to use. */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available. */
-enum fputype arm_fpu_arch;
+/* The current tuning set. */
+const struct tune_params *current_tune;
/* Which floating point hardware to schedule for. */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use. */
+const struct arm_fpu_desc *arm_fpu_desc;
/* Whether to use floating point hardware. */
enum float_abi_type arm_float_abi;
+/* Which __fp16 format to use. */
+enum arm_fp16_format_type arm_fp16_format;
+
/* Which ABI to use. */
enum arm_abi_type arm_abi;
#define FL_DIV (1 << 18) /* Hardware divide. */
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
#define FL_NEON (1 << 20) /* Neon instructions. */
+#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
+ architecture. */
+#define FL_ARCH7 (1 << 22) /* Architecture 7. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
+/* Flags that only effect tuning, not available instructions. */
+#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
+ | FL_CO_PROC)
+
#define FL_FOR_ARCH2 FL_NOTM
#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
-#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
+#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
/* The bits in this mask specify which
instructions we are allowed to generate. */
/* Nonzero if this chip supports the ARM 6K extensions. */
int arm_arch6k = 0;
+/* Nonzero if this chip supports the ARM 7 extensions. */
+int arm_arch7 = 0;
+
/* Nonzero if instructions not present in the 'M' profile can be used. */
int arm_arch_notm = 0;
+/* Nonzero if instructions present in ARMv7E-M can be used. */
+int arm_arch7em = 0;
+
/* Nonzero if this chip can benefit from load scheduling. */
int arm_ld_sched = 0;
This typically means an ARM6 or ARM7 with MMU or MPU. */
int arm_tune_wbuf = 0;
+/* Nonzero if tuning for Cortex-A9. */
+int arm_tune_cortex_a9 = 0;
+
/* Nonzero if generating Thumb instructions. */
int thumb_code = 0;
+/* Nonzero if generating Thumb-1 instructions. */
+int thumb1_code = 0;
+
/* Nonzero if we should define __THUMB_INTERWORK__ in the
preprocessor.
XXX This is a bit of a hack, it's intended to help work around
/* Nonzero if chip supports integer division instruction. */
int arm_arch_hwdiv;
-/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
- must report the mode of the memory reference from PRINT_OPERAND to
- PRINT_OPERAND_ADDRESS. */
+/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
+ we must report the mode of the memory reference from
+ TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
enum machine_mode output_memory_reference_mode;
/* The register number to be used for the PIC offset register. */
unsigned arm_pic_register = INVALID_REGNUM;
-/* Set to 1 when a return insn is output, this means that the epilogue
- is not needed. */
-int return_used_this_function;
-
/* Set to 1 after arm_reorg has started. Reset to start at the start of
the next function. */
static int after_arm_reorg = 0;
-/* The maximum number of insns to be used when loading a constant. */
-static int arm_constant_limit = 3;
+enum arm_pcs arm_pcs_default;
/* For an explanation of these variables, see final_prescan_insn below. */
int arm_ccfsm_state;
/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
enum arm_cond_code arm_current_cc;
+
rtx arm_target_insn;
int arm_target_label;
/* The number of conditionally executed insns, including the current insn. */
"hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
};
+/* The register numbers in sequence, for passing to arm_gen_load_multiple. */
+int arm_regs_in_sequence[] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
#define streq(string1, string2) (strcmp (string1, string2) == 0)
enum processor_type core;
const char *arch;
const unsigned long flags;
- bool (* rtx_costs) (rtx, int, int, int *);
+ const struct tune_params *const tune;
};
+const struct tune_params arm_slowmul_tune =
+{
+ arm_slowmul_rtx_costs,
+ NULL,
+ 3
+};
+
+const struct tune_params arm_fastmul_tune =
+{
+ arm_fastmul_rtx_costs,
+ NULL,
+ 1
+};
+
+const struct tune_params arm_xscale_tune =
+{
+ arm_xscale_rtx_costs,
+ xscale_sched_adjust_cost,
+ 2
+};
+
+const struct tune_params arm_9e_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1
+};
+
+const struct tune_params arm_cortex_a9_tune =
+{
+ arm_9e_rtx_costs,
+ cortex_a9_sched_adjust_cost,
+ 1
+};
+
+
/* Not all of these give usefully different compilation alternatives,
but there is no simple way of generalizing them. */
static const struct processors all_cores[] =
{
/* ARM Cores */
#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
- {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
+ {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
#include "arm-cores.def"
#undef ARM_CORE
{NULL, arm_none, NULL, 0, NULL}
static const struct processors all_architectures[] =
{
/* ARM Architectures */
- /* We don't specify rtx_costs here as it will be figured out
+ /* We don't specify tuning costs here as it will be figured out
from the core. */
{"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
{"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
+ {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
{"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
{"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
+ {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
{NULL, arm_none, NULL, 0 , NULL}
};
-struct arm_cpu_select
-{
- const char * string;
- const char * name;
- const struct processors * processors;
-};
-
-/* This is a magic structure. The 'string' field is magically filled in
- with a pointer to the value specified by the user on the command line
- assuming that the user has specified such a value. */
-
-static struct arm_cpu_select arm_select[] =
-{
- /* string name processors */
- { NULL, "-mcpu=", all_cores },
- { NULL, "-march=", all_architectures },
- { NULL, "-mtune=", all_cores }
-};
-/* Defines representing the indexes into the above table. */
-#define ARM_OPT_SET_CPU 0
-#define ARM_OPT_SET_ARCH 1
-#define ARM_OPT_SET_TUNE 2
+/* These are populated as commandline arguments are processed, or NULL
+ if not specified. */
+static const struct processors *arm_selected_arch;
+static const struct processors *arm_selected_cpu;
+static const struct processors *arm_selected_tune;
/* The name of the preprocessor macro to define for this architecture. */
char arm_arch_name[] = "__ARM_ARCH_0UNK__";
-struct fpu_desc
-{
- const char * name;
- enum fputype fpu;
-};
-
-
/* Available values for -mfpu=. */
-static const struct fpu_desc all_fpus[] =
-{
- {"fpa", FPUTYPE_FPA},
- {"fpe2", FPUTYPE_FPA_EMU2},
- {"fpe3", FPUTYPE_FPA_EMU2},
- {"maverick", FPUTYPE_MAVERICK},
- {"vfp", FPUTYPE_VFP},
- {"vfp3", FPUTYPE_VFP3},
- {"neon", FPUTYPE_NEON}
-};
-
-
-/* Floating point models used by the different hardware.
- See fputype in arm.h. */
-
-static const enum fputype fp_model_for_fpu[] =
-{
- /* No FP hardware. */
- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
- ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
+static const struct arm_fpu_desc all_fpus[] =
+{
+ {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
+ {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
+ {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
+ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
+ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
+ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
+ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
+ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
+ /* Compatibility aliases. */
+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
};
};
+struct fp16_format
+{
+ const char *name;
+ enum arm_fp16_format_type fp16_format_type;
+};
+
+
+/* Available values for -mfp16-format=. */
+
+static const struct fp16_format all_fp16_formats[] =
+{
+ {"none", ARM_FP16_FORMAT_NONE},
+ {"ieee", ARM_FP16_FORMAT_IEEE},
+ {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
+};
+
+
struct abi_name
{
const char *name;
TLS_LE32
};
+/* The maximum number of insns to be used when loading a constant. */
+inline static int
+arm_constant_limit (bool size_p)
+{
+ return size_p ? 1 : current_tune->constant_limit;
+}
+
/* Emit an insn that's a simple single-set. Both the operands must be known
to be valid. */
inline static rtx
set_optab_libfunc (umod_optab, DImode, NULL);
set_optab_libfunc (smod_optab, SImode, NULL);
set_optab_libfunc (umod_optab, SImode, NULL);
+
+ /* Half-precision float operations. The compiler handles all operations
+ with NULL libfuncs by converting the SFmode. */
+ switch (arm_fp16_format)
+ {
+ case ARM_FP16_FORMAT_IEEE:
+ case ARM_FP16_FORMAT_ALTERNATIVE:
+
+ /* Conversions. */
+ set_conv_libfunc (trunc_optab, HFmode, SFmode,
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE
+ ? "__gnu_f2h_ieee"
+ : "__gnu_f2h_alternative"));
+ set_conv_libfunc (sext_optab, SFmode, HFmode,
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE
+ ? "__gnu_h2f_ieee"
+ : "__gnu_h2f_alternative"));
+
+ /* Arithmetic. */
+ set_optab_libfunc (add_optab, HFmode, NULL);
+ set_optab_libfunc (sdiv_optab, HFmode, NULL);
+ set_optab_libfunc (smul_optab, HFmode, NULL);
+ set_optab_libfunc (neg_optab, HFmode, NULL);
+ set_optab_libfunc (sub_optab, HFmode, NULL);
+
+ /* Comparisons. */
+ set_optab_libfunc (eq_optab, HFmode, NULL);
+ set_optab_libfunc (ne_optab, HFmode, NULL);
+ set_optab_libfunc (lt_optab, HFmode, NULL);
+ set_optab_libfunc (le_optab, HFmode, NULL);
+ set_optab_libfunc (ge_optab, HFmode, NULL);
+ set_optab_libfunc (gt_optab, HFmode, NULL);
+ set_optab_libfunc (unord_optab, HFmode, NULL);
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_AAPCS_BASED)
+ synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+}
+
+/* On AAPCS systems, this is the "struct __va_list". */
+static GTY(()) tree va_list_type;
+
+/* Return the type to use as __builtin_va_list. */
+static tree
+arm_build_builtin_va_list (void)
+{
+ tree va_list_name;
+ tree ap_field;
+
+ if (!TARGET_AAPCS_BASED)
+ return std_build_builtin_va_list ();
+
+ /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
+ defined as:
+
+ struct __va_list
+ {
+ void *__ap;
+ };
+
+ The C Library ABI further reinforces this definition in \S
+ 4.1.
+
+ We must follow this definition exactly. The structure tag
+ name is visible in C++ mangled names, and thus forms a part
+ of the ABI. The field name may be used by people who
+ #include <stdarg.h>. */
+ /* Create the type. */
+ va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+ /* Give it the required name. */
+ va_list_name = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL,
+ get_identifier ("__va_list"),
+ va_list_type);
+ DECL_ARTIFICIAL (va_list_name) = 1;
+ TYPE_NAME (va_list_type) = va_list_name;
+ /* Create the __ap field. */
+ ap_field = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL,
+ get_identifier ("__ap"),
+ ptr_type_node);
+ DECL_ARTIFICIAL (ap_field) = 1;
+ DECL_FIELD_CONTEXT (ap_field) = va_list_type;
+ TYPE_FIELDS (va_list_type) = ap_field;
+ /* Compute its layout. */
+ layout_type (va_list_type);
+
+ return va_list_type;
+}
+
+/* Return an expression of type "void *" pointing to the next
+ available argument in a variable-argument list. VALIST is the
+ user-level va_list object, of type __builtin_va_list. */
+static tree
+arm_extract_valist_ptr (tree valist)
+{
+ if (TREE_TYPE (valist) == error_mark_node)
+ return error_mark_node;
+
+ /* On an AAPCS target, the pointer is stored within "struct
+ va_list". */
+ if (TARGET_AAPCS_BASED)
+ {
+ tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
+ valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
+ valist, ap_field, NULL_TREE);
+ }
+
+ return valist;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
+static void
+arm_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+ valist = arm_extract_valist_ptr (valist);
+ std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
+static tree
+arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p)
+{
+ valist = arm_extract_valist_ptr (valist);
+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+}
+
+/* Lookup NAME in SEL. */
+
+static const struct processors *
+arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
+{
+ if (!(name && *name))
+ return NULL;
+
+ for (; sel->name != NULL; sel++)
+ {
+ if (streq (name, sel->name))
+ return sel;
+ }
+
+ error ("bad value (%s) for %s switch", name, desc);
+ return NULL;
}
/* Implement TARGET_HANDLE_OPTION. */
switch (code)
{
case OPT_march_:
- arm_select[1].string = arg;
+ arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
return true;
case OPT_mcpu_:
- arm_select[0].string = arg;
+ arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
return true;
case OPT_mhard_float:
return true;
case OPT_mtune_:
- arm_select[2].string = arg;
+ arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
return true;
default:
}
-/* Fix up any incompatible options that the user has specified.
- This has now turned into a maze. */
-void
-arm_override_options (void)
+/* Fix up any incompatible options that the user has specified. */
+static void
+arm_option_override (void)
{
unsigned i;
- enum processor_type target_arch_cpu = arm_none;
-
- /* Set up the flags based on the cpu/architecture selected by the user. */
- for (i = ARRAY_SIZE (arm_select); i--;)
- {
- struct arm_cpu_select * ptr = arm_select + i;
-
- if (ptr->string != NULL && ptr->string[0] != '\0')
- {
- const struct processors * sel;
-
- for (sel = ptr->processors; sel->name != NULL; sel++)
- if (streq (ptr->string, sel->name))
- {
- /* Set the architecture define. */
- if (i != ARM_OPT_SET_TUNE)
- sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
-
- /* Determine the processor core for which we should
- tune code-generation. */
- if (/* -mcpu= is a sensible default. */
- i == ARM_OPT_SET_CPU
- /* -mtune= overrides -mcpu= and -march=. */
- || i == ARM_OPT_SET_TUNE)
- arm_tune = (enum processor_type) (sel - ptr->processors);
-
- /* Remember the CPU associated with this architecture.
- If no other option is used to set the CPU type,
- we'll use this to guess the most suitable tuning
- options. */
- if (i == ARM_OPT_SET_ARCH)
- target_arch_cpu = sel->core;
-
- if (i != ARM_OPT_SET_TUNE)
- {
- /* If we have been given an architecture and a processor
- make sure that they are compatible. We only generate
- a warning though, and we prefer the CPU over the
- architecture. */
- if (insn_flags != 0 && (insn_flags ^ sel->flags))
- warning (0, "switch -mcpu=%s conflicts with -march= switch",
- ptr->string);
-
- insn_flags = sel->flags;
- }
- break;
- }
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+ SUBTARGET_OVERRIDE_OPTIONS;
+#endif
- if (sel->name == NULL)
- error ("bad value (%s) for %s switch", ptr->string, ptr->name);
- }
+ if (arm_selected_arch)
+ {
+ if (arm_selected_cpu)
+ {
+ /* Check for conflict between mcpu and march. */
+ if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
+ {
+ warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
+ arm_selected_cpu->name, arm_selected_arch->name);
+ /* -march wins for code generation.
+ -mcpu wins for default tuning. */
+ if (!arm_selected_tune)
+ arm_selected_tune = arm_selected_cpu;
+
+ arm_selected_cpu = arm_selected_arch;
+ }
+ else
+ /* -mcpu wins. */
+ arm_selected_arch = NULL;
+ }
+ else
+ /* Pick a CPU based on the architecture. */
+ arm_selected_cpu = arm_selected_arch;
}
- /* Guess the tuning options from the architecture if necessary. */
- if (arm_tune == arm_none)
- arm_tune = target_arch_cpu;
-
/* If the user did not specify a processor, choose one for them. */
- if (insn_flags == 0)
+ if (!arm_selected_cpu)
{
const struct processors * sel;
unsigned int sought;
- enum processor_type cpu;
- cpu = TARGET_CPU_DEFAULT;
- if (cpu == arm_none)
+ arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
+ if (!arm_selected_cpu->name)
{
#ifdef SUBTARGET_CPU_DEFAULT
/* Use the subtarget default CPU if none was specified by
configure. */
- cpu = SUBTARGET_CPU_DEFAULT;
+ arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
#endif
/* Default to ARM6. */
- if (cpu == arm_none)
- cpu = arm6;
+ if (!arm_selected_cpu->name)
+ arm_selected_cpu = &all_cores[arm6];
}
- sel = &all_cores[cpu];
+ sel = arm_selected_cpu;
insn_flags = sel->flags;
/* Now check to see if the user has specified some command line
sel = best_fit;
}
- insn_flags = sel->flags;
+ arm_selected_cpu = sel;
}
- sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
- arm_default_cpu = (enum processor_type) (sel - all_cores);
- if (arm_tune == arm_none)
- arm_tune = arm_default_cpu;
}
- /* The processor for which we should tune should now have been
- chosen. */
- gcc_assert (arm_tune != arm_none);
+ gcc_assert (arm_selected_cpu);
+ /* The selected cpu may be an architecture, so lookup tuning by core ID. */
+ if (!arm_selected_tune)
+ arm_selected_tune = &all_cores[arm_selected_cpu->core];
- tune_flags = all_cores[(int)arm_tune].flags;
- if (optimize_size)
- targetm.rtx_costs = arm_size_rtx_costs;
+ sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
+ insn_flags = arm_selected_cpu->flags;
+
+ arm_tune = arm_selected_tune->core;
+ tune_flags = arm_selected_tune->flags;
+ current_tune = arm_selected_tune->tune;
+
+ if (target_fp16_format_name)
+ {
+ for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
+ {
+ if (streq (all_fp16_formats[i].name, target_fp16_format_name))
+ {
+ arm_fp16_format = all_fp16_formats[i].fp16_format_type;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE (all_fp16_formats))
+ error ("invalid __fp16 format option: -mfp16-format=%s",
+ target_fp16_format_name);
+ }
+ else
+ arm_fp16_format = ARM_FP16_FORMAT_NONE;
+
+ if (target_abi_name)
+ {
+ for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
+ {
+ if (streq (arm_all_abis[i].name, target_abi_name))
+ {
+ arm_abi = arm_all_abis[i].abi_type;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE (arm_all_abis))
+ error ("invalid ABI option: -mabi=%s", target_abi_name);
+ }
else
- targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
+ arm_abi = ARM_DEFAULT_ABI;
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
if (TARGET_ARM && !(insn_flags & FL_NOTM))
error ("target CPU does not support ARM mode");
- if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
+ /* BPABI targets use linker tricks to allow interworking on cores
+ without thumb support. */
+ if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
{
warning (0, "target CPU does not support interworking" );
target_flags &= ~MASK_INTERWORK;
/* Callee super interworking implies thumb interworking. Adding
this to the flags here simplifies the logic elsewhere. */
if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
- target_flags |= MASK_INTERWORK;
+ target_flags |= MASK_INTERWORK;
/* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
from here where no function is being compiled currently. */
if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
- if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
- warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
-
if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
{
warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
arm_arch6 = (insn_flags & FL_ARCH6) != 0;
arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch7 = (insn_flags & FL_ARCH7) != 0;
+ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
- thumb_code = (TARGET_ARM == 0);
+ thumb_code = TARGET_ARM == 0;
+ thumb1_code = TARGET_THUMB1 != 0;
arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+ arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+
+ /* If we are not using the default (ARM mode) section anchor offset
+ ranges, then set the correct ranges now. */
+ if (TARGET_THUMB1)
+ {
+ /* Thumb-1 LDR instructions cannot have negative offsets.
+ Permissible positive offset ranges are 5-bit (for byte loads),
+ 6-bit (for halfword loads), or 7-bit (for word loads).
+ Empirical results suggest a 7-bit anchor range gives the best
+ overall code size. */
+ targetm.min_anchor_offset = 0;
+ targetm.max_anchor_offset = 127;
+ }
+ else if (TARGET_THUMB2)
+ {
+ /* The minimum is set such that the total size of the block
+ for a particular anchor is 248 + 1 + 4095 bytes, which is
+ divisible by eight, ensuring natural spacing of anchors. */
+ targetm.min_anchor_offset = -248;
+ targetm.max_anchor_offset = 4095;
+ }
/* V5 code we generate is completely interworking capable, so we turn off
TARGET_INTERWORK here to avoid many tests later on. */
if (arm_arch5)
target_flags &= ~MASK_INTERWORK;
- if (target_abi_name)
- {
- for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
- {
- if (streq (arm_all_abis[i].name, target_abi_name))
- {
- arm_abi = arm_all_abis[i].abi_type;
- break;
- }
- }
- if (i == ARRAY_SIZE (arm_all_abis))
- error ("invalid ABI option: -mabi=%s", target_abi_name);
- }
- else
- arm_abi = ARM_DEFAULT_ABI;
-
if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
error ("iwmmxt abi requires an iwmmxt capable cpu");
- arm_fp_model = ARM_FP_MODEL_UNKNOWN;
if (target_fpu_name == NULL && target_fpe_name != NULL)
{
if (streq (target_fpe_name, "2"))
error ("invalid floating point emulation option: -mfpe=%s",
target_fpe_name);
}
- if (target_fpu_name != NULL)
- {
- /* The user specified a FPU. */
- for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
- {
- if (streq (all_fpus[i].name, target_fpu_name))
- {
- arm_fpu_arch = all_fpus[i].fpu;
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- break;
- }
- }
- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
- error ("invalid floating point option: -mfpu=%s", target_fpu_name);
- }
- else
+
+ if (target_fpu_name == NULL)
{
#ifdef FPUTYPE_DEFAULT
- /* Use the default if it is specified for this platform. */
- arm_fpu_arch = FPUTYPE_DEFAULT;
- arm_fpu_tune = FPUTYPE_DEFAULT;
+ target_fpu_name = FPUTYPE_DEFAULT;
#else
- /* Pick one based on CPU type. */
- /* ??? Some targets assume FPA is the default.
- if ((insn_flags & FL_VFP) != 0)
- arm_fpu_arch = FPUTYPE_VFP;
- else
- */
if (arm_arch_cirrus)
- arm_fpu_arch = FPUTYPE_MAVERICK;
+ target_fpu_name = "maverick";
else
- arm_fpu_arch = FPUTYPE_FPA_EMU2;
+ target_fpu_name = "fpe2";
#endif
- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
- arm_fpu_tune = FPUTYPE_FPA;
- else
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
}
- if (target_float_abi_name != NULL)
+ arm_fpu_desc = NULL;
+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
{
- /* The user specified a FP ABI. */
- for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
+ if (streq (all_fpus[i].name, target_fpu_name))
+ {
+ arm_fpu_desc = &all_fpus[i];
+ break;
+ }
+ }
+
+ if (!arm_fpu_desc)
+ {
+ error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+ return;
+ }
+
+ switch (arm_fpu_desc->model)
+ {
+ case ARM_FP_MODEL_FPA:
+ if (arm_fpu_desc->rev == 2)
+ arm_fpu_attr = FPU_FPE2;
+ else if (arm_fpu_desc->rev == 3)
+ arm_fpu_attr = FPU_FPE3;
+ else
+ arm_fpu_attr = FPU_FPA;
+ break;
+
+ case ARM_FP_MODEL_MAVERICK:
+ arm_fpu_attr = FPU_MAVERICK;
+ break;
+
+ case ARM_FP_MODEL_VFP:
+ arm_fpu_attr = FPU_VFP;
+ break;
+
+ default:
+ gcc_unreachable();
+ }
+
+ if (target_float_abi_name != NULL)
+ {
+ /* The user specified a FP ABI. */
+ for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
{
if (streq (all_float_abis[i].name, target_float_abi_name))
{
else
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
- sorry ("-mfloat-abi=hard and VFP");
+ if (TARGET_AAPCS_BASED
+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
+ error ("FPA is unsupported in the AAPCS");
+
+ if (TARGET_AAPCS_BASED)
+ {
+ if (TARGET_CALLER_INTERWORKING)
+ error ("AAPCS does not support -mcaller-super-interworking");
+ else
+ if (TARGET_CALLEE_INTERWORKING)
+ error ("AAPCS does not support -mcallee-super-interworking");
+ }
/* FPA and iWMMXt are incompatible because the insn encodings overlap.
VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
if (TARGET_THUMB2 && TARGET_IWMMXT)
sorry ("Thumb-2 iWMMXt");
+ /* __fp16 support currently assumes the core has ldrh. */
+ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+ sorry ("__fp16 and no ldrh");
+
/* If soft-float is specified then don't use FPU. */
if (TARGET_SOFT_FLOAT)
- arm_fpu_arch = FPUTYPE_NONE;
+ arm_fpu_attr = FPU_NONE;
+
+ if (TARGET_AAPCS_BASED)
+ {
+ if (arm_abi == ARM_ABI_IWMMXT)
+ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+ else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+ && TARGET_HARD_FLOAT
+ && TARGET_VFP)
+ arm_pcs_default = ARM_PCS_AAPCS_VFP;
+ else
+ arm_pcs_default = ARM_PCS_AAPCS;
+ }
+ else
+ {
+ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+ sorry ("-mfloat-abi=hard and VFP");
+
+ if (arm_abi == ARM_ABI_APCS)
+ arm_pcs_default = ARM_PCS_APCS;
+ else
+ arm_pcs_default = ARM_PCS_ATPCS;
+ }
/* For arm2/3 there is no need to do any scheduling if there is only
a floating point emulator, or we are doing software floating-point. */
if ((TARGET_SOFT_FLOAT
- || arm_fpu_tune == FPUTYPE_FPA_EMU2
- || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+ || (TARGET_FPA && arm_fpu_desc->rev))
&& (tune_flags & FL_MODE32) == 0)
flag_schedule_insns = flag_schedule_insns_after_reload = 0;
/* Use the cp15 method if it is available. */
if (target_thread_pointer == TP_AUTO)
{
- if (arm_arch6k && !TARGET_THUMB)
+ if (arm_arch6k && !TARGET_THUMB1)
target_thread_pointer = TP_CP15;
else
target_thread_pointer = TP_SOFT;
arm_pic_register = pic_register;
}
- /* ??? We might want scheduling for thumb2. */
- if (TARGET_THUMB && flag_schedule_insns)
+ /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
+ if (fix_cm3_ldrd == 2)
+ {
+ if (arm_selected_cpu->core == cortexm3)
+ fix_cm3_ldrd = 1;
+ else
+ fix_cm3_ldrd = 0;
+ }
+
+ if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
flag_schedule_insns = 0;
if (optimize_size)
{
- arm_constant_limit = 1;
-
/* If optimizing for size, bump the number of instructions that we
are prepared to conditionally execute (even on a StrongARM). */
max_insns_skipped = 6;
}
else
{
- /* For processors with load scheduling, it never costs more than
- 2 cycles to load a constant, and the load scheduler may well
- reduce that to 1. */
- if (arm_ld_sched)
- arm_constant_limit = 1;
-
- /* On XScale the longer latency of a load makes it more difficult
- to achieve a good schedule, so it's faster to synthesize
- constants that can be done in two insns. */
- if (arm_tune_xscale)
- arm_constant_limit = 2;
-
/* StrongARM has early execution of branches, so a sequence
that is worth skipping is shorter. */
if (arm_tune_strongarm)
max_insns_skipped = 3;
}
+ /* Hot/Cold partitioning is not currently supported, since we can't
+ handle literal pool placement in that case. */
+ if (flag_reorder_blocks_and_partition)
+ {
+ inform (input_location,
+ "-freorder-blocks-and-partition not supported on this architecture");
+ flag_reorder_blocks_and_partition = 0;
+ flag_reorder_blocks = 1;
+ }
+
+ if (flag_pic)
+ /* Hoisting PIC address calculations more aggressively provides a small,
+ but measurable, size reduction for PIC code. Therefore, we decrease
+ the bar for unrestricted expression hoisting to the cost of PIC address
+ calculation, which is 2 instructions. */
+ maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2);
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
if (optimize > 0
&& (TREE_NOTHROW (current_function_decl)
|| !(flag_unwind_tables
- || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
+ || (flag_exceptions && arm_except_unwind_info () != UI_SJLJ)))
&& TREE_THIS_VOLATILE (current_function_decl))
type |= ARM_FT_VOLATILE;
}
\f
+/* Output assembler code for a block containing the constant parts
+ of a trampoline, leaving space for the variable parts.
+
+ On the ARM, (if r8 is the static chain regnum, and remembering that
+ referencing pc adds an offset of 8) the trampoline looks like:
+ ldr r8, [pc, #0]
+ ldr pc, [pc]
+ .word static chain value
+ .word function's address
+ XXX FIXME: When the trampoline returns, r8 will be clobbered. */
+
+static void
+arm_asm_trampoline_template (FILE *f)
+{
+ if (TARGET_ARM)
+ {
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else if (TARGET_THUMB2)
+ {
+ /* The Thumb-2 trampoline is similar to the arm implementation.
+ Unlike 16-bit Thumb, we enter the stub in thumb mode. */
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+ STATIC_CHAIN_REGNUM, PC_REGNUM);
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
+ }
+ else
+ {
+ ASM_OUTPUT_ALIGN (f, 2);
+ fprintf (f, "\t.code\t16\n");
+ fprintf (f, ".Ltrampoline_start:\n");
+ asm_fprintf (f, "\tpush\t{r0, r1}\n");
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+ asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
+ asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
+ }
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline. */
+
+static void
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+ rtx fnaddr, mem, a_tramp;
+
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
+ emit_move_insn (mem, chain_value);
+
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
+ fnaddr = XEXP (DECL_RTL (fndecl), 0);
+ emit_move_insn (mem, fnaddr);
+
+ a_tramp = XEXP (m_tramp, 0);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+ LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
+ plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+/* Thumb trampolines should be entered in thumb mode, so set
+ the bottom bit of the address. */
+
+static rtx
+arm_trampoline_adjust_address (rtx addr)
+{
+ if (TARGET_THUMB)
+ addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
+ NULL, 0, OPTAB_LIB_WIDEN);
+ return addr;
+}
+\f
/* Return 1 if it is possible to return using a single instruction.
If SIBLING is non-null, this is a test for a return before a sibling
call. SIBLING is the call insn, so we can examine its register usage. */
switch (code)
{
case PLUS:
+ case COMPARE:
+ case EQ:
+ case NE:
+ case GT:
+ case LE:
+ case LT:
+ case GE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
case MINUS: /* Should only occur with (MINUS I reg) => rsb */
case XOR:
+ return 0;
+
case IOR:
+ if (TARGET_THUMB2)
+ return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
return 0;
case AND:
&& !cond
&& (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1, 0)
- > arm_constant_limit + (code != SET)))
+ > (arm_constant_limit (optimize_function_for_size_p (cfun))
+ + (code != SET))))
{
if (code == SET)
{
/* Currently SET is the only monadic value for CODE, all
the rest are diadic. */
- emit_set_insn (target, GEN_INT (val));
+ if (TARGET_USE_MOVT)
+ arm_emit_movpair (target, GEN_INT (val));
+ else
+ emit_set_insn (target, GEN_INT (val));
+
return 1;
}
else
{
rtx temp = subtargets ? gen_reg_rtx (mode) : target;
- emit_set_insn (temp, GEN_INT (val));
+ if (TARGET_USE_MOVT)
+ arm_emit_movpair (temp, GEN_INT (val));
+ else
+ emit_set_insn (temp, GEN_INT (val));
+
/* For MINUS, the value is subtracted from, since we never
have subtraction of a constant. */
if (code == MINUS)
1);
}
-/* Return the number of ARM instructions required to synthesize the given
- constant. */
+/* Return the number of instructions required to synthesize the given
+ constant, if we start emitting them from bit-position I. */
static int
count_insns_for_constant (HOST_WIDE_INT remainder, int i)
{
HOST_WIDE_INT temp1;
+ int step_size = TARGET_ARM ? 2 : 1;
int num_insns = 0;
+
+ gcc_assert (TARGET_ARM || i == 0);
+
do
{
int end;
if (i <= 0)
i += 32;
- if (remainder & (3 << (i - 2)))
+ if (remainder & (((1 << step_size) - 1) << (i - step_size)))
{
end = i - 8;
if (end < 0)
| ((i < end) ? (0xff >> (32 - end)) : 0));
remainder &= ~temp1;
num_insns++;
- i -= 6;
+ i -= 8 - step_size;
}
- i -= 2;
+ i -= step_size;
} while (remainder);
return num_insns;
}
+static int
+find_best_start (unsigned HOST_WIDE_INT remainder)
+{
+ int best_consecutive_zeros = 0;
+ int i;
+ int best_start = 0;
+
+ /* If we aren't targetting ARM, the best place to start is always at
+ the bottom. */
+ if (! TARGET_ARM)
+ return 0;
+
+ for (i = 0; i < 32; i += 2)
+ {
+ int consecutive_zeros = 0;
+
+ if (!(remainder & (3 << i)))
+ {
+ while ((i < 32) && !(remainder & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
+ }
+ }
+
+ /* So long as it won't require any more insns to do so, it's
+ desirable to emit a small constant (in bits 0...9) in the last
+ insn. This way there is more chance that it can be combined with
+ a later addressing insn to form a pre-indexed load or store
+ operation. Consider:
+
+ *((volatile int *)0xe0000100) = 1;
+ *((volatile int *)0xe0000110) = 2;
+
+ We want this to wind up as:
+
+ mov rA, #0xe0000000
+ mov rB, #1
+ str rB, [rA, #0x100]
+ mov rB, #2
+ str rB, [rA, #0x110]
+
+ rather than having to synthesize both large constants from scratch.
+
+ Therefore, we calculate how many insns would be required to emit
+ the constant starting from `best_start', and also starting from
+ zero (i.e. with bit 31 first to be output). If `best_start' doesn't
+ yield a shorter sequence, we may as well use zero. */
+ if (best_start != 0
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+ && (count_insns_for_constant (remainder, 0) <=
+ count_insns_for_constant (remainder, best_start)))
+ best_start = 0;
+
+ return best_start;
+}
+
/* Emit an instruction with the indicated PATTERN. If COND is
non-NULL, conditionalize the execution of the instruction on COND
being true. */
{
int can_invert = 0;
int can_negate = 0;
+ int final_invert = 0;
int can_negate_initial = 0;
- int can_shift = 0;
int i;
int num_bits_set = 0;
int set_sign_bit_copies = 0;
int insns = 0;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+ int step_size = TARGET_ARM ? 2 : 1;
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
{
case SET:
can_invert = 1;
- can_shift = 1;
can_negate = 1;
break;
GEN_INT (ARM_SIGN_EXTEND (val))));
return 1;
}
+
if (remainder == 0)
{
if (reload_completed && rtx_equal_p (target, source))
return 0;
+
if (generate)
emit_constant_insn (cond,
gen_rtx_SET (VOIDmode, target, source));
return 1;
}
+
+ if (TARGET_THUMB2)
+ can_invert = 1;
break;
case AND:
return 1;
}
- /* We don't know how to handle other cases yet. */
- gcc_assert (remainder == 0xffffffff);
-
- if (generate)
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, target,
- gen_rtx_NOT (mode, source)));
- return 1;
+ if (remainder == 0xffffffff)
+ {
+ if (generate)
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ return 1;
+ }
+ break;
case MINUS:
/* We treat MINUS as (val - source), since (source - val) is always
/* Calculate a few attributes that may be useful for specific
optimizations. */
+ /* Count number of leading zeros. */
for (i = 31; i >= 0; i--)
{
if ((remainder & (1 << i)) == 0)
break;
}
+ /* Count number of leading 1's. */
for (i = 31; i >= 0; i--)
{
if ((remainder & (1 << i)) != 0)
break;
}
+ /* Count number of trailing zero's. */
for (i = 0; i <= 31; i++)
{
if ((remainder & (1 << i)) == 0)
break;
}
+ /* Count number of trailing 1's. */
for (i = 0; i <= 31; i++)
{
if ((remainder & (1 << i)) != 0)
if (code == XOR)
break;
+ /* Convert.
+ x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
+ and the remainder 0s for e.g. 0xfff00000)
+ x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
+
+ This can be done in 2 instructions by using shifts with mov or mvn.
+ e.g. for
+ x = x | 0xfff00000;
+ we generate.
+ mvn r0, r0, asl #12
+ mvn r0, r0, lsr #12 */
if (set_sign_bit_copies > 8
&& (val & (-1 << (32 - set_sign_bit_copies))) == val)
{
return 2;
}
+ /* Convert
+ x = y | constant (which has set_zero_bit_copies number of trailing ones).
+ to
+ x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
+
+ For eg. r0 = r0 | 0xfff
+ mvn r0, r0, lsr #12
+ mvn r0, r0, asl #12
+
+ */
if (set_zero_bit_copies > 8
&& (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
{
return 2;
}
+ /* This will never be reached for Thumb2 because orn is a valid
+ instruction. This is for Thumb1 and the ARM 32 bit cases.
+
+ x = y | constant (such that ~constant is a valid constant)
+ Transform this to
+ x = ~(~y & ~constant).
+ */
if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
{
if (generate)
if (remainder & (1 << i))
num_bits_set++;
- if (code == AND || (can_invert && num_bits_set > 16))
- remainder = (~remainder) & 0xffffffff;
+ if ((code == AND)
+ || (code != IOR && can_invert && num_bits_set > 16))
+ remainder ^= 0xffffffff;
else if (code == PLUS && num_bits_set > 16)
remainder = (-remainder) & 0xffffffff;
+
+ /* For XOR, if more than half the bits are set and there's a sequence
+ of more than 8 consecutive ones in the pattern then we can XOR by the
+ inverted constant and then invert the final result; this may save an
+ instruction and might also lead to the final mvn being merged with
+ some other operation. */
+ else if (code == XOR && num_bits_set > 16
+ && (count_insns_for_constant (remainder ^ 0xffffffff,
+ find_best_start
+ (remainder ^ 0xffffffff))
+ < count_insns_for_constant (remainder,
+ find_best_start (remainder))))
+ {
+ remainder ^= 0xffffffff;
+ final_invert = 1;
+ }
else
{
can_invert = 0;
/* ??? Use thumb2 replicated constants when the high and low halfwords are
the same. */
{
- int best_start = 0;
- if (!TARGET_THUMB2)
- {
- int best_consecutive_zeros = 0;
-
- for (i = 0; i < 32; i += 2)
- {
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
- {
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
- {
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
- }
- i -= 2;
- }
- }
-
- /* So long as it won't require any more insns to do so, it's
- desirable to emit a small constant (in bits 0...9) in the last
- insn. This way there is more chance that it can be combined with
- a later addressing insn to form a pre-indexed load or store
- operation. Consider:
-
- *((volatile int *)0xe0000100) = 1;
- *((volatile int *)0xe0000110) = 2;
-
- We want this to wind up as:
-
- mov rA, #0xe0000000
- mov rB, #1
- str rB, [rA, #0x100]
- mov rB, #2
- str rB, [rA, #0x110]
-
- rather than having to synthesize both large constants from scratch.
-
- Therefore, we calculate how many insns would be required to emit
- the constant starting from `best_start', and also starting from
- zero (i.e. with bit 31 first to be output). If `best_start' doesn't
- yield a shorter sequence, we may as well use zero. */
- if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
- }
-
/* Now start emitting the insns. */
- i = best_start;
+ i = find_best_start (remainder);
do
{
int end;
}
else
{
- if (remainder && subtargets)
+ if ((final_invert || remainder) && subtargets)
new_src = gen_reg_rtx (mode);
else
new_src = target;
code = PLUS;
insns++;
- if (TARGET_ARM)
- i -= 6;
- else
- i -= 7;
+ i -= 8 - step_size;
}
/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
shifts. */
- if (TARGET_ARM)
- i -= 2;
- else
- i--;
+ i -= step_size;
}
while (remainder);
}
+ if (final_invert)
+ {
+ if (generate)
+ emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+ gen_rtx_NOT (mode, source)));
+ insns++;
+ }
+
return insns;
}
immediate value easier to load. */
enum rtx_code
-arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
- rtx * op1)
+arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
{
- unsigned HOST_WIDE_INT i = INTVAL (*op1);
- unsigned HOST_WIDE_INT maxval;
+ enum machine_mode mode;
+ unsigned HOST_WIDE_INT i, maxval;
+
+ mode = GET_MODE (*op0);
+ if (mode == VOIDmode)
+ mode = GET_MODE (*op1);
+
maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
+ /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
+ we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
+ reversed or (for constant OP1) adjusted to GE/LT. Similarly
+ for GTU/LEU in Thumb mode. */
+ if (mode == DImode)
+ {
+ rtx tem;
+
+ /* To keep things simple, always use the Cirrus cfcmp64 if it is
+ available. */
+ if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
+ return code;
+
+ if (code == GT || code == LE
+ || (!TARGET_ARM && (code == GTU || code == LEU)))
+ {
+ /* Missing comparison. First try to use an available
+ comparison. */
+ if (GET_CODE (*op1) == CONST_INT)
+ {
+ i = INTVAL (*op1);
+ switch (code)
+ {
+ case GT:
+ case LE:
+ if (i != maxval
+ && arm_const_double_by_immediates (GEN_INT (i + 1)))
+ {
+ *op1 = GEN_INT (i + 1);
+ return code == GT ? GE : LT;
+ }
+ break;
+ case GTU:
+ case LEU:
+ if (i != ~((unsigned HOST_WIDE_INT) 0)
+ && arm_const_double_by_immediates (GEN_INT (i + 1)))
+ {
+ *op1 = GEN_INT (i + 1);
+ return code == GTU ? GEU : LTU;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* If that did not work, reverse the condition. */
+ tem = *op0;
+ *op0 = *op1;
+ *op1 = tem;
+ return swap_condition (code);
+ }
+
+ return code;
+ }
+
+ /* Comparisons smaller than DImode. Only adjust comparisons against
+ an out-of-range constant. */
+ if (GET_CODE (*op1) != CONST_INT
+ || const_ok_for_arm (INTVAL (*op1))
+ || const_ok_for_arm (- INTVAL (*op1)))
+ return code;
+
+ i = INTVAL (*op1);
+
switch (code)
{
case EQ:
/* Define how to find the value returned by a function. */
-rtx
-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
+static rtx
+arm_function_value(const_tree type, const_tree func,
+ bool outgoing ATTRIBUTE_UNUSED)
{
enum machine_mode mode;
int unsignedp ATTRIBUTE_UNUSED;
rtx r ATTRIBUTE_UNUSED;
mode = TYPE_MODE (type);
+
+ if (TARGET_AAPCS_BASED)
+ return aapcs_allocate_return_reg (mode, type, func);
+
/* Promote integer types. */
if (INTEGRAL_TYPE_P (type))
- PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
+ mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
/* Promotes small structs returned in a register to full-word size
for big-endian AAPCS. */
}
}
- return LIBCALL_VALUE(mode);
+ return LIBCALL_VALUE (mode);
+}
+
+static int
+libcall_eq (const void *p1, const void *p2)
+{
+ return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+}
+
+static hashval_t
+libcall_hash (const void *p1)
+{
+ return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+}
+
+static void
+add_libcall (htab_t htab, rtx libcall)
+{
+ *htab_find_slot (htab, libcall, INSERT) = libcall;
+}
+
+static bool
+arm_libcall_uses_aapcs_base (const_rtx libcall)
+{
+ static bool init_done = false;
+ static htab_t libcall_htab;
+
+ if (!init_done)
+ {
+ init_done = true;
+
+ libcall_htab = htab_create (31, libcall_hash, libcall_eq,
+ NULL);
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, DImode));
+
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, DImode));
+
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sext_optab, SFmode, HFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (trunc_optab, HFmode, SFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, SFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, SFmode));
+ }
+
+ return libcall && htab_find (libcall_htab, libcall) != NULL;
+}
+
+rtx
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
+{
+ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+ && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ /* The following libcalls return their result in integer registers,
+ even though they return a floating point value. */
+ if (arm_libcall_uses_aapcs_base (libcall))
+ return gen_rtx_REG (mode, ARG_REGISTER(1));
+
+ }
+
+ return LIBCALL_VALUE (mode);
}
/* Determine the amount of memory needed to store the possible return
{
int size = 16;
- if (TARGET_ARM)
+ if (TARGET_32BIT)
{
if (TARGET_HARD_FLOAT_ABI)
{
+ if (TARGET_VFP)
+ size += 32;
if (TARGET_FPA)
size += 12;
if (TARGET_MAVERICK)
return size;
}
-/* Decide whether a type should be returned in memory (true)
- or in a register (false). This is called by the macro
- TARGET_RETURN_IN_MEMORY. */
-bool
-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+/* Decide whether TYPE should be returned in memory (true)
+ or in a register (false). FNTYPE is the type of the function making
+ the call. */
+static bool
+arm_return_in_memory (const_tree type, const_tree fntype)
{
HOST_WIDE_INT size;
- size = int_size_in_bytes (type);
+ size = int_size_in_bytes (type); /* Negative if not fixed size. */
+
+ if (TARGET_AAPCS_BASED)
+ {
+ /* Simple, non-aggregate types (ie not including vectors and
+ complex) are always returned in a register (or registers).
+ We don't care about which register here, so we can short-cut
+ some of the detail. */
+ if (!AGGREGATE_TYPE_P (type)
+ && TREE_CODE (type) != VECTOR_TYPE
+ && TREE_CODE (type) != COMPLEX_TYPE)
+ return false;
+
+ /* Any return value that is no larger than one word can be
+ returned in r0. */
+ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+ return false;
+
+ /* Check any available co-processors to see if they accept the
+ type as a register candidate (VFP, for example, can return
+ some aggregates in consecutive registers). These aren't
+ available if the call is variadic. */
+ if (aapcs_select_return_coproc (type, fntype) >= 0)
+ return false;
+
+ /* Vector values should be returned using ARM registers, not
+ memory (unless they're over 16 bytes, which will break since
+ we only have four call-clobbered registers to play with). */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+ /* The rest go in memory. */
+ return true;
+ }
- /* Vector values should be returned using ARM registers, not memory (unless
- they're over 16 bytes, which will break since we only have four
- call-clobbered registers to play with). */
if (TREE_CODE (type) == VECTOR_TYPE)
return (size < 0 || size > (4 * UNITS_PER_WORD));
if (!AGGREGATE_TYPE_P (type) &&
- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
- /* All simple types are returned in registers.
- For AAPCS, complex types are treated the same as aggregates. */
- return 0;
+ (TREE_CODE (type) != VECTOR_TYPE))
+ /* All simple types are returned in registers. */
+ return false;
if (arm_abi != ARM_ABI_APCS)
{
the aggregate is either huge or of variable size, and in either case
we will want to return it via memory and not in a register. */
if (size < 0 || size > UNITS_PER_WORD)
- return 1;
+ return true;
if (TREE_CODE (type) == RECORD_TYPE)
{
have been created by C++. */
for (field = TYPE_FIELDS (type);
field && TREE_CODE (field) != FIELD_DECL;
- field = TREE_CHAIN (field))
+ field = DECL_CHAIN (field))
continue;
if (field == NULL)
- return 0; /* An empty structure. Allowed by an extension to ANSI C. */
+ return false; /* An empty structure. Allowed by an extension to ANSI C. */
/* Check that the first field is valid for returning in a register. */
/* ... Floats are not allowed */
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
/* ... Aggregates that are not themselves valid for returning in
a register are not allowed. */
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
/* Now check the remaining fields, if any. Only bitfields are allowed,
since they are not addressable. */
- for (field = TREE_CHAIN (field);
+ for (field = DECL_CHAIN (field);
field;
- field = TREE_CHAIN (field))
+ field = DECL_CHAIN (field))
{
if (TREE_CODE (field) != FIELD_DECL)
continue;
if (!DECL_BIT_FIELD_TYPE (field))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
if (TREE_CODE (type) == UNION_TYPE)
integral, or can be returned in an integer register. */
for (field = TYPE_FIELDS (type);
field;
- field = TREE_CHAIN (field))
+ field = DECL_CHAIN (field))
{
if (TREE_CODE (field) != FIELD_DECL)
continue;
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
#endif /* not ARM_WINCE */
/* Return all other types in memory. */
- return 1;
+ return true;
}
/* Indicate whether or not words of a double are in big-endian order. */
return 1;
}
-/* Initialize a variable CUM of type CUMULATIVE_ARGS
- for a call to a function whose data type is FNTYPE.
- For a library call, FNTYPE is NULL. */
-void
-arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
- rtx libname ATTRIBUTE_UNUSED,
- tree fndecl ATTRIBUTE_UNUSED)
+const struct pcs_attribute_arg
{
- /* On the ARM, the offset starts at 0. */
- pcum->nregs = 0;
- pcum->iwmmxt_nregs = 0;
- pcum->can_split = true;
+ const char *arg;
+ enum arm_pcs value;
+} pcs_attribute_args[] =
+ {
+ {"aapcs", ARM_PCS_AAPCS},
+ {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+ /* We could recognize these, but changes would be needed elsewhere
+ * to implement them. */
+ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+ {"atpcs", ARM_PCS_ATPCS},
+ {"apcs", ARM_PCS_APCS},
+#endif
+ {NULL, ARM_PCS_UNKNOWN}
+ };
- /* Varargs vectors are treated the same as long long.
- named_count avoids having to change the way arm handles 'named' */
- pcum->named_count = 0;
- pcum->nargs = 0;
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+ const struct pcs_attribute_arg *ptr;
+ const char *arg;
- if (TARGET_REALLY_IWMMXT && fntype)
+ /* Get the value of the argument. */
+ if (TREE_VALUE (attr) == NULL_TREE
+ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+ return ARM_PCS_UNKNOWN;
+
+ arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+
+ /* Check it against the list of known arguments. */
+ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+ if (streq (arg, ptr->arg))
+ return ptr->value;
+
+ /* An unrecognized interrupt type. */
+ return ARM_PCS_UNKNOWN;
+}
+
+/* Get the PCS variant to use for this call. TYPE is the function's type
+ specification, DECL is the specific declartion. DECL may be null if
+ the call could be indirect or if this is a library call. */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+ bool user_convention = false;
+ enum arm_pcs user_pcs = arm_pcs_default;
+ tree attr;
+
+ gcc_assert (type);
+
+ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+ if (attr)
{
- tree fn_arg;
+ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+ user_convention = true;
+ }
- for (fn_arg = TYPE_ARG_TYPES (fntype);
- fn_arg;
- fn_arg = TREE_CHAIN (fn_arg))
- pcum->named_count += 1;
+ if (TARGET_AAPCS_BASED)
+ {
+ /* Detect varargs functions. These always use the base rules
+ (no argument is ever a candidate for a co-processor
+ register). */
+ bool base_rules = stdarg_p (type);
+
+ if (user_convention)
+ {
+ if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+ sorry ("Non-AAPCS derived PCS variant");
+ else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+ error ("Variadic functions must use the base AAPCS variant");
+ }
- if (! pcum->named_count)
- pcum->named_count = INT_MAX;
+ if (base_rules)
+ return ARM_PCS_AAPCS;
+ else if (user_convention)
+ return user_pcs;
+ else if (decl && flag_unit_at_a_time)
+ {
+ /* Local functions never leak outside this compilation unit,
+ so we are free to use whatever conventions are
+ appropriate. */
+ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ if (i && i->local)
+ return ARM_PCS_AAPCS_LOCAL;
+ }
}
+ else if (user_convention && user_pcs != arm_pcs_default)
+ sorry ("PCS variant");
+
+ /* For everything else we use the target's default. */
+ return arm_pcs_default;
}
-/* Return true if mode/type need doubleword alignment. */
-bool
-arm_needs_doubleword_align (enum machine_mode mode, tree type)
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ rtx libcall ATTRIBUTE_UNUSED,
+ const_tree fndecl ATTRIBUTE_UNUSED)
{
- return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
- || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+ /* Record the unallocated VFP registers. */
+ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+ pcum->aapcs_vfp_reg_alloc = 0;
}
+/* Walk down the type tree of TYPE counting consecutive base elements.
+ If *MODEP is VOIDmode, then set it to the first valid floating point
+ type. If a non-floating point type is found, or if a floating point
+ type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+ otherwise return the count in the sub-tree. */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+ enum machine_mode mode;
+ HOST_WIDE_INT size;
+
+ switch (TREE_CODE (type))
+ {
+ case REAL_TYPE:
+ mode = TYPE_MODE (type);
+ if (mode != DFmode && mode != SFmode)
+ return -1;
-/* Determine where to put an argument to a function.
- Value is zero to push the argument on the stack,
- or a hard register in which to store the argument.
+ if (*modep == VOIDmode)
+ *modep = mode;
- MODE is the argument's machine mode.
- TYPE is the data type of the argument (as a tree).
- This is null for libcalls where that information may
- not be available.
- CUM is a variable of type CUMULATIVE_ARGS which gives info about
- the preceding args and about the function being called.
- NAMED is nonzero if this argument is a named parameter
- (otherwise it is an extra parameter matching an ellipsis). */
+ if (*modep == mode)
+ return 1;
-rtx
-arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, int named)
-{
- int nregs;
+ break;
- /* Varargs vectors are treated the same as long long.
- named_count avoids having to change the way arm handles 'named' */
- if (TARGET_IWMMXT_ABI
- && arm_vector_mode_supported_p (mode)
- && pcum->named_count > pcum->nargs + 1)
- {
- if (pcum->iwmmxt_nregs <= 9)
- return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
- else
+ case COMPLEX_TYPE:
+ mode = TYPE_MODE (TREE_TYPE (type));
+ if (mode != DFmode && mode != SFmode)
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 2;
+
+ break;
+
+ case VECTOR_TYPE:
+ /* Use V2SImode and V4SImode as representatives of all 64-bit
+ and 128-bit vector types, whether or not those modes are
+ supported with the present options. */
+ size = int_size_in_bytes (type);
+ switch (size)
{
- pcum->can_split = false;
- return NULL_RTX;
+ case 8:
+ mode = V2SImode;
+ break;
+ case 16:
+ mode = V4SImode;
+ break;
+ default:
+ return -1;
}
- }
- /* Put doubleword aligned quantities in even register pairs. */
- if (pcum->nregs & 1
- && ARM_DOUBLEWORD_ALIGN
- && arm_needs_doubleword_align (mode, type))
- pcum->nregs++;
+ if (*modep == VOIDmode)
+ *modep = mode;
- if (mode == VOIDmode)
- /* Pick an arbitrary value for operand 2 of the call insn. */
- return const0_rtx;
+ /* Vector modes are considered to be opaque: two vectors are
+ equivalent for the purposes of being homogeneous aggregates
+ if they are the same size. */
+ if (*modep == mode)
+ return 1;
- /* Only allow splitting an arg between regs and memory if all preceding
- args were allocated to regs. For args passed by reference we only count
- the reference pointer. */
- if (pcum->can_split)
- nregs = 1;
- else
- nregs = ARM_NUM_REGS2 (mode, type);
+ break;
- if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
- return NULL_RTX;
+ case ARRAY_TYPE:
+ {
+ int count;
+ tree index = TYPE_DOMAIN (type);
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+ if (count == -1
+ || !index
+ || !TYPE_MAX_VALUE (index)
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
+ || !TYPE_MIN_VALUE (index)
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
+ || count < 0)
+ return -1;
+
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case RECORD_TYPE:
+ {
+ int count = 0;
+ int sub_count;
+ tree field;
- return gen_rtx_REG (mode, pcum->nregs);
-}
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
-static int
-arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, bool named ATTRIBUTE_UNUSED)
-{
- int nregs = pcum->nregs;
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
- if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
- return 0;
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count += sub_count;
+ }
- if (NUM_ARG_REGS > nregs
- && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
- && pcum->can_split)
- return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
- return 0;
-}
+ return count;
+ }
-/* Variable sized types are passed by reference. This is a GCC
- extension to the ARM ABI. */
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ /* These aren't very interesting except in a degenerate case. */
+ int count = 0;
+ int sub_count;
+ tree field;
-static bool
-arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- const_tree type, bool named ATTRIBUTE_UNUSED)
-{
- return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
-}
-\f
-/* Encode the current state of the #pragma [no_]long_calls. */
-typedef enum
-{
- OFF, /* No #pragma [no_]long_calls is in effect. */
- LONG, /* #pragma long_calls is in effect. */
- SHORT /* #pragma no_long_calls is in effect. */
-} arm_pragma_enum;
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
-static arm_pragma_enum arm_pragma_long_calls = OFF;
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
-void
-arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-{
- arm_pragma_long_calls = LONG;
-}
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count = count > sub_count ? count : sub_count;
+ }
-void
-arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-{
- arm_pragma_long_calls = SHORT;
-}
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
-void
-arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-{
- arm_pragma_long_calls = OFF;
-}
-\f
-/* Table of machine attributes. */
-const struct attribute_spec arm_attribute_table[] =
-{
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
- /* Function calls made to this symbol must be done indirectly, because
- it may lie outside of the 26 bit addressing range of a normal function
- call. */
- { "long_call", 0, 0, false, true, true, NULL },
- /* Whereas these functions are always known to reside within the 26 bit
- addressing range. */
- { "short_call", 0, 0, false, true, true, NULL },
- /* Interrupt Service Routines have special prologue and epilogue requirements. */
- { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
- { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
- { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
-#ifdef ARM_PE
- /* ARM/PE has three new attributes:
- interfacearm - ?
- dllexport - for exporting a function/variable that will live in a dll
- dllimport - for importing a function/variable from a dll
+ return count;
+ }
- Microsoft allows multiple declspecs in one __declspec, separating
- them with spaces. We do NOT support this. Instead, use __declspec
- multiple times.
- */
- { "dllimport", 0, 0, true, false, false, NULL },
- { "dllexport", 0, 0, true, false, false, NULL },
- { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
-#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
- { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
- { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
- { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
-#endif
- { NULL, 0, 0, false, false, false, NULL }
-};
+ default:
+ break;
+ }
-/* Handle an attribute requiring a FUNCTION_DECL;
- arguments as in struct attribute_spec.handler. */
-static tree
-arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
- int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ return -1;
+}
+
+/* Return true if PCS_VARIANT should use VFP registers. */
+static bool
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
{
- if (TREE_CODE (*node) != FUNCTION_DECL)
+ if (pcs_variant == ARM_PCS_AAPCS_VFP)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
+ static bool seen_thumb1_vfp = false;
+
+ if (TARGET_THUMB1 && !seen_thumb1_vfp)
+ {
+ sorry ("Thumb-1 hard-float VFP ABI");
+ /* sorry() is not immediately fatal, so only display this once. */
+ seen_thumb1_vfp = true;
+ }
+
+ return true;
}
- return NULL_TREE;
+ if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+ return false;
+
+ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+ (TARGET_VFP_DOUBLE || !is_double));
}
-/* Handle an "interrupt" or "isr" attribute;
- arguments as in struct attribute_spec.handler. */
-static tree
-arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
- bool *no_add_attrs)
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type,
+ enum machine_mode *base_mode, int *count)
{
- if (DECL_P (*node))
+ enum machine_mode new_mode = VOIDmode;
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
{
- if (TREE_CODE (*node) != FUNCTION_DECL)
- {
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- }
- /* FIXME: the argument if any is checked for type attributes;
- should it be checked for decl ones? */
+ *count = 1;
+ new_mode = mode;
}
- else
+ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
{
- if (TREE_CODE (*node) == FUNCTION_TYPE
- || TREE_CODE (*node) == METHOD_TYPE)
- {
- if (arm_isr_value (args) == ARM_FT_UNKNOWN)
- {
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- }
- }
- else if (TREE_CODE (*node) == POINTER_TYPE
- && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
- || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
- && arm_isr_value (args) != ARM_FT_UNKNOWN)
- {
- *node = build_variant_type_copy (*node);
- TREE_TYPE (*node) = build_type_attribute_variant
- (TREE_TYPE (*node),
- tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
- *no_add_attrs = true;
- }
+ *count = 2;
+ new_mode = (mode == DCmode ? DFmode : SFmode);
+ }
+ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
+ {
+ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+ if (ag_count > 0 && ag_count <= 4)
+ *count = ag_count;
else
- {
- /* Possibly pass this attribute on from the type to a decl. */
- if (flags & ((int) ATTR_FLAG_DECL_NEXT
- | (int) ATTR_FLAG_FUNCTION_NEXT
- | (int) ATTR_FLAG_ARRAY_NEXT))
- {
- *no_add_attrs = true;
- return tree_cons (name, args, NULL_TREE);
- }
- else
- {
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
- }
- }
+ return false;
}
+ else
+ return false;
- return NULL_TREE;
+
+ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+ return false;
+
+ *base_mode = new_mode;
+ return true;
}
-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
-/* Handle the "notshared" attribute. This attribute is another way of
- requesting hidden visibility. ARM's compiler supports
- "__declspec(notshared)"; we support the same thing via an
- attribute. */
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type)
+{
+ int count ATTRIBUTE_UNUSED;
+ enum machine_mode ag_mode ATTRIBUTE_UNUSED;
-static tree
-arm_handle_notshared_attribute (tree *node,
- tree name ATTRIBUTE_UNUSED,
- tree args ATTRIBUTE_UNUSED,
- int flags ATTRIBUTE_UNUSED,
- bool *no_add_attrs)
+ if (!use_vfp_abi (pcs_variant, false))
+ return false;
+ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type)
{
- tree decl = TYPE_NAME (*node);
+ if (!use_vfp_abi (pcum->pcs_variant, false))
+ return false;
- if (decl)
- {
- DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
- DECL_VISIBILITY_SPECIFIED (decl) = 1;
- *no_add_attrs = false;
- }
- return NULL_TREE;
+ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
+ &pcum->aapcs_vfp_rmode,
+ &pcum->aapcs_vfp_rcount);
}
-#endif
-/* Return 0 if the attributes for two types are incompatible, 1 if they
- are compatible, and 2 if they are nearly compatible (which causes a
- warning to be generated). */
-static int
-arm_comp_type_attributes (const_tree type1, const_tree type2)
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
{
- int l1, l2, s1, s2;
+ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+ int regno;
+
+ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+ {
+ pcum->aapcs_vfp_reg_alloc = mask << regno;
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+ {
+ int i;
+ int rcount = pcum->aapcs_vfp_rcount;
+ int rshift = shift;
+ enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+ rtx par;
+ if (!TARGET_NEON)
+ {
+ /* Avoid using unsupported vector modes. */
+ if (rmode == V2SImode)
+ rmode = DImode;
+ else if (rmode == V4SImode)
+ {
+ rmode = DImode;
+ rcount *= 2;
+ rshift /= 2;
+ }
+ }
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+ for (i = 0; i < rcount; i++)
+ {
+ rtx tmp = gen_rtx_REG (rmode,
+ FIRST_VFP_REGNUM + regno + i * rshift);
+ tmp = gen_rtx_EXPR_LIST
+ (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (rmode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
- /* Check for mismatch of non-default calling convention. */
- if (TREE_CODE (type1) != FUNCTION_TYPE)
- return 1;
+ pcum->aapcs_reg = par;
+ }
+ else
+ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+ return true;
+ }
+ return false;
+}
- /* Check for mismatched call attributes. */
- l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
- l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
- s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
- s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+ enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ if (!use_vfp_abi (pcs_variant, false))
+ return false;
- /* Only bother to check if an attribute is defined. */
- if (l1 | l2 | s1 | s2)
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
{
- /* If one type has an attribute, the other must have the same attribute. */
- if ((l1 != l2) || (s1 != s2))
- return 0;
+ int count;
+ enum machine_mode ag_mode;
+ int i;
+ rtx par;
+ int shift;
+
+ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+ &ag_mode, &count);
- /* Disallow mixed attributes. */
- if ((l1 & s2) || (l2 & s1))
- return 0;
- }
+ if (!TARGET_NEON)
+ {
+ if (ag_mode == V2SImode)
+ ag_mode = DImode;
+ else if (ag_mode == V4SImode)
+ {
+ ag_mode = DImode;
+ count *= 2;
+ }
+ }
+ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+ for (i = 0; i < count; i++)
+ {
+ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
- /* Check for mismatched ISR attribute. */
- l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
- if (! l1)
- l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
- l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
- if (! l2)
- l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
- if (l1 != l2)
- return 0;
+ return par;
+ }
- return 1;
+ return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
}
-/* Assigns default attributes to newly defined type. This is used to
- set short_call/long_call attributes for function types of
- functions defined inside corresponding #pragma scopes. */
static void
-arm_set_default_type_attributes (tree type)
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+ pcum->aapcs_vfp_reg_alloc = 0;
+ return;
+}
+
+#define AAPCS_CP(X) \
+ { \
+ aapcs_ ## X ## _cum_init, \
+ aapcs_ ## X ## _is_call_candidate, \
+ aapcs_ ## X ## _allocate, \
+ aapcs_ ## X ## _is_return_candidate, \
+ aapcs_ ## X ## _allocate_return_reg, \
+ aapcs_ ## X ## _advance \
+ }
+
+/* Table of co-processors that can be used to pass arguments in
+ registers. Idealy no arugment should be a candidate for more than
+ one co-processor table entry, but the table is processed in order
+ and stops after the first match. If that entry then fails to put
+ the argument into a co-processor register, the argument will go on
+ the stack. */
+static struct
+{
+ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
+ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+ /* Return true if an argument of mode MODE (or type TYPE if MODE is
+ BLKmode) is a candidate for this co-processor's registers; this
+ function should ignore any position-dependent state in
+ CUMULATIVE_ARGS and only use call-type dependent information. */
+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if the argument does get a co-processor register; it
+ should set aapcs_reg to an RTX of the register allocated as is
+ required for a return from FUNCTION_ARG. */
+ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if a result of mode MODE (or type TYPE if MODE is
+ BLKmode) is can be returned in this co-processor's registers. */
+ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Allocate and return an RTX element to hold the return type of a
+ call, this routine must not fail and will only be called if
+ is_return_candidate returned true with the same parameters. */
+ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Finish processing this argument and prepare to start processing
+ the next one. */
+ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+ {
+ AAPCS_CP(vfp)
+ };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type)
{
- /* Add __attribute__ ((long_call)) to all functions, when
- inside #pragma long_calls or __attribute__ ((short_call)),
- when inside #pragma no_long_calls. */
- if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
- {
- tree type_attr_list, attr_name;
- type_attr_list = TYPE_ATTRIBUTES (type);
+ int i;
- if (arm_pragma_long_calls == LONG)
- attr_name = get_identifier ("long_call");
- else if (arm_pragma_long_calls == SHORT)
- attr_name = get_identifier ("short_call");
- else
- return;
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+ return i;
- type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
- TYPE_ATTRIBUTES (type) = type_attr_list;
- }
+ return -1;
}
-\f
-/* Return true if DECL is known to be linked into section SECTION. */
-static bool
-arm_function_in_section_p (tree decl, section *section)
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
{
- /* We can only be certain about functions defined in the same
- compilation unit. */
- if (!TREE_STATIC (decl))
- return false;
-
- /* Make sure that SYMBOL always binds to the definition in this
- compilation unit. */
- if (!targetm.binds_local_p (decl))
- return false;
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
- /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
- if (!DECL_SECTION_NAME (decl))
+ if (fntype)
{
- /* Only cater for unit-at-a-time mode, where we know that the user
- cannot later specify a section for DECL. */
- if (!flag_unit_at_a_time)
- return false;
+ const_tree fndecl = NULL_TREE;
- /* Make sure that we will not create a unique section for DECL. */
- if (flag_function_sections || DECL_ONE_ONLY (decl))
- return false;
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
+
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
}
+ else
+ pcs_variant = arm_pcs_default;
- return function_section (decl) == section;
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
+ TYPE_MODE (type),
+ type))
+ return i;
+ }
+ return -1;
}
-/* Return nonzero if a 32-bit "long_call" should be generated for
- a call from the current function to DECL. We generate a long_call
- if the function:
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+ const_tree fntype)
+{
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
+ int unsignedp ATTRIBUTE_UNUSED;
- a. has an __attribute__((long call))
- or b. is within the scope of a #pragma long_calls
- or c. the -mlong-calls command line switch has been specified
+ if (fntype)
+ {
+ const_tree fndecl = NULL_TREE;
- However we do not generate a long call if the function:
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
- d. has an __attribute__ ((short_call))
- or e. is inside the scope of a #pragma no_long_calls
- or f. is defined in the same section as the current function. */
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ }
+ else
+ pcs_variant = arm_pcs_default;
-bool
-arm_is_long_call_p (tree decl)
-{
- tree attrs;
+ /* Promote integer types. */
+ if (type && INTEGRAL_TYPE_P (type))
+ mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
- if (!decl)
- return TARGET_LONG_CALLS;
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
- attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
- if (lookup_attribute ("short_call", attrs))
- return false;
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+ type))
+ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+ mode, type);
+ }
- /* For "f", be conservative, and only cater for cases in which the
- whole of the current function is placed in the same section. */
- if (!flag_reorder_blocks_and_partition
- && arm_function_in_section_p (decl, current_function_section ()))
- return false;
+ /* Promotes small structs returned in a register to full-word size
+ for big-endian AAPCS. */
+ if (type && arm_return_in_msb (type))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if (size % UNITS_PER_WORD != 0)
+ {
+ size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+ }
+ }
- if (lookup_attribute ("long_call", attrs))
- return true;
+ return gen_rtx_REG (mode, R0_REGNUM);
+}
- return TARGET_LONG_CALLS;
+rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
}
-/* Return nonzero if it is ok to make a tail-call to DECL. */
-static bool
-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+/* Lay out a function argument using the AAPCS rules. The rule
+ numbers referred to here are those in the AAPCS. */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type, bool named)
{
- unsigned long func_type;
+ int nregs, nregs2;
+ int ncrn;
- if (cfun->machine->sibcall_blocked)
- return false;
+ /* We only need to do this once per argument. */
+ if (pcum->aapcs_arg_processed)
+ return;
- /* Never tailcall something for which we have no decl, or if we
- are in Thumb mode. */
- if (decl == NULL || TARGET_THUMB)
- return false;
+ pcum->aapcs_arg_processed = true;
- /* The PIC register is live on entry to VxWorks PLT entries, so we
- must make the call before restoring the PIC register. */
- if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
- return false;
+ /* Special case: if named is false then we are handling an incoming
+ anonymous argument which is on the stack. */
+ if (!named)
+ return;
+
+ /* Is this a potential co-processor register candidate? */
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
+ {
+ int slot = aapcs_select_call_coproc (pcum, mode, type);
+ pcum->aapcs_cprc_slot = slot;
- /* Cannot tail-call to long calls, since these are out of range of
- a branch instruction. */
- if (arm_is_long_call_p (decl))
- return false;
+ /* We don't have to apply any of the rules from part B of the
+ preparation phase, these are handled elsewhere in the
+ compiler. */
- /* If we are interworking and the function is not declared static
- then we can't tail-call it unless we know that it exists in this
- compilation unit (since it might be a Thumb routine). */
- if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
- return false;
+ if (slot >= 0)
+ {
+ /* A Co-processor register candidate goes either in its own
+ class of registers or on the stack. */
+ if (!pcum->aapcs_cprc_failed[slot])
+ {
+ /* C1.cp - Try to allocate the argument to co-processor
+ registers. */
+ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+ return;
+
+ /* C2.cp - Put the argument on the stack and note that we
+ can't assign any more candidates in this slot. We also
+ need to note that we have allocated stack space, so that
+ we won't later try to split a non-cprc candidate between
+ core registers and the stack. */
+ pcum->aapcs_cprc_failed[slot] = true;
+ pcum->can_split = false;
+ }
- func_type = arm_current_func_type ();
- /* Never tailcall from an ISR routine - it needs a special exit sequence. */
- if (IS_INTERRUPT (func_type))
- return false;
+ /* We didn't get a register, so this argument goes on the
+ stack. */
+ gcc_assert (pcum->can_split == false);
+ return;
+ }
+ }
- /* Never tailcall if function may be called with a misaligned SP. */
- if (IS_STACKALIGN (func_type))
- return false;
+ /* C3 - For double-word aligned arguments, round the NCRN up to the
+ next even number. */
+ ncrn = pcum->aapcs_ncrn;
+ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+ ncrn++;
- /* Everything else is ok. */
- return true;
-}
+ nregs = ARM_NUM_REGS2(mode, type);
-\f
-/* Addressing mode support functions. */
+ /* Sigh, this test should really assert that nregs > 0, but a GCC
+ extension allows empty structs and then gives them empty size; it
+ then allows such a structure to be passed by value. For some of
+ the code below we have to pretend that such an argument has
+ non-zero size so that we 'locate' it correctly either in
+ registers or on the stack. */
+ gcc_assert (nregs >= 0);
-/* Return nonzero if X is a legitimate immediate operand when compiling
- for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
-int
-legitimate_pic_operand_p (rtx x)
-{
- if (GET_CODE (x) == SYMBOL_REF
- || (GET_CODE (x) == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
- return 0;
+ nregs2 = nregs ? nregs : 1;
- return 1;
-}
+ /* C4 - Argument fits entirely in core registers. */
+ if (ncrn + nregs2 <= NUM_ARG_REGS)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = ncrn + nregs;
+ return;
+ }
-/* Record that the current function needs a PIC register. Initialize
- cfun->machine->pic_reg if we have not already done so. */
+ /* C5 - Some core registers left and there are no arguments already
+ on the stack: split this argument between the remaining core
+ registers and the stack. */
+ if (ncrn < NUM_ARG_REGS && pcum->can_split)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+ return;
+ }
-static void
-require_pic_register (void)
+ /* C6 - NCRN is set to 4. */
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
+ return;
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is NULL. */
+void
+arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+ rtx libname,
+ tree fndecl ATTRIBUTE_UNUSED)
{
- /* A lot of the logic here is made obscure by the fact that this
- routine gets called as part of the rtx cost estimation process.
- We don't want those calls to affect any assumptions about the real
- function; and further, we can't call entry_of_function() until we
- start the real expansion process. */
- if (!crtl->uses_pic_offset_table)
+ /* Long call handling. */
+ if (fntype)
+ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ else
+ pcum->pcs_variant = arm_pcs_default;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
{
- gcc_assert (can_create_pseudo_p ());
- if (arm_pic_register != INVALID_REGNUM)
+ if (arm_libcall_uses_aapcs_base (libname))
+ pcum->pcs_variant = ARM_PCS_AAPCS;
+
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_cprc_slot = -1;
+ pcum->can_split = true;
+
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
{
- cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
+ int i;
- /* Play games to avoid marking the function as needing pic
- if we are being called as part of the cost-estimation
- process. */
- if (current_ir_type () != IR_GIMPLE)
- crtl->uses_pic_offset_table = 1;
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ {
+ pcum->aapcs_cprc_failed[i] = false;
+ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+ }
}
- else
- {
- rtx seq;
+ return;
+ }
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+ /* Legacy ABIs */
- /* Play games to avoid marking the function as needing pic
- if we are being called as part of the cost-estimation
- process. */
- if (current_ir_type () != IR_GIMPLE)
- {
- crtl->uses_pic_offset_table = 1;
- start_sequence ();
+ /* On the ARM, the offset starts at 0. */
+ pcum->nregs = 0;
+ pcum->iwmmxt_nregs = 0;
+ pcum->can_split = true;
- arm_load_pic_register (0UL);
+ /* Varargs vectors are treated the same as long long.
+ named_count avoids having to change the way arm handles 'named' */
+ pcum->named_count = 0;
+ pcum->nargs = 0;
- seq = get_insns ();
- end_sequence ();
- emit_insn_after (seq, entry_of_function ());
- }
- }
+ if (TARGET_REALLY_IWMMXT && fntype)
+ {
+ tree fn_arg;
+
+ for (fn_arg = TYPE_ARG_TYPES (fntype);
+ fn_arg;
+ fn_arg = TREE_CHAIN (fn_arg))
+ pcum->named_count += 1;
+
+ if (! pcum->named_count)
+ pcum->named_count = INT_MAX;
}
}
-rtx
-legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+
+/* Return true if mode/type need doubleword alignment. */
+bool
+arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
{
- if (GET_CODE (orig) == SYMBOL_REF
- || GET_CODE (orig) == LABEL_REF)
- {
- rtx pic_ref, address;
- rtx insn;
- int subregs = 0;
+ return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
+ || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+}
- /* If this function doesn't have a pic register, create one now. */
- require_pic_register ();
- if (reg == 0)
- {
- gcc_assert (can_create_pseudo_p ());
- reg = gen_reg_rtx (Pmode);
+/* Determine where to put an argument to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
- subregs = 1;
- }
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis).
- if (subregs)
- address = gen_reg_rtx (Pmode);
- else
- address = reg;
+ On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
+ other arguments are passed on the stack. If (NAMED == 0) (which happens
+ only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
+ defined), say it is passed in the stack (function_prologue will
+ indeed make it pass in the stack if necessary). */
- if (TARGET_ARM)
- emit_insn (gen_pic_load_addr_arm (address, orig));
- else if (TARGET_THUMB2)
- emit_insn (gen_pic_load_addr_thumb2 (address, orig));
- else /* TARGET_THUMB1 */
- emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+static rtx
+arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type, bool named)
+{
+ int nregs;
- /* VxWorks does not impose a fixed gap between segments; the run-time
- gap can be different from the object-file gap. We therefore can't
- use GOTOFF unless we are absolutely sure that the symbol is in the
- same segment as the GOT. Unfortunately, the flexibility of linker
- scripts means that we can't be sure of that in general, so assume
- that GOTOFF is never valid on VxWorks. */
- if ((GET_CODE (orig) == LABEL_REF
- || (GET_CODE (orig) == SYMBOL_REF &&
- SYMBOL_REF_LOCAL_P (orig)))
- && NEED_GOT_RELOC
- && !TARGET_VXWORKS_RTP)
- pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
+ /* Handle the special case quickly. Pick an arbitrary value for op2 of
+ a call insn (op3 of a call_value insn). */
+ if (mode == VOIDmode)
+ return const0_rtx;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_reg;
+ }
+
+ /* Varargs vectors are treated the same as long long.
+ named_count avoids having to change the way arm handles 'named' */
+ if (TARGET_IWMMXT_ABI
+ && arm_vector_mode_supported_p (mode)
+ && pcum->named_count > pcum->nargs + 1)
+ {
+ if (pcum->iwmmxt_nregs <= 9)
+ return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
else
{
- pic_ref = gen_const_mem (Pmode,
- gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
- address));
+ pcum->can_split = false;
+ return NULL_RTX;
}
+ }
- insn = emit_move_insn (reg, pic_ref);
+ /* Put doubleword aligned quantities in even register pairs. */
+ if (pcum->nregs & 1
+ && ARM_DOUBLEWORD_ALIGN
+ && arm_needs_doubleword_align (mode, type))
+ pcum->nregs++;
- /* Put a REG_EQUAL note on this insn, so that it can be optimized
- by loop. */
- set_unique_reg_note (insn, REG_EQUAL, orig);
+ /* Only allow splitting an arg between regs and memory if all preceding
+ args were allocated to regs. For args passed by reference we only count
+ the reference pointer. */
+ if (pcum->can_split)
+ nregs = 1;
+ else
+ nregs = ARM_NUM_REGS2 (mode, type);
- return reg;
- }
- else if (GET_CODE (orig) == CONST)
- {
- rtx base, offset;
+ if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
+ return NULL_RTX;
- if (GET_CODE (XEXP (orig, 0)) == PLUS
- && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
- return orig;
+ return gen_rtx_REG (mode, pcum->nregs);
+}
- if (GET_CODE (XEXP (orig, 0)) == UNSPEC
- && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
- return orig;
+static int
+arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, bool named)
+{
+ int nregs = pcum->nregs;
- if (reg == 0)
- {
- gcc_assert (can_create_pseudo_p ());
- reg = gen_reg_rtx (Pmode);
- }
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_partial;
+ }
- gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+ if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
+ return 0;
- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
- offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
- base == reg ? 0 : reg);
+ if (NUM_ARG_REGS > nregs
+ && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
+ && pcum->can_split)
+ return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
- if (GET_CODE (offset) == CONST_INT)
- {
- /* The base register doesn't really matter, we only want to
- test the index for the appropriate mode. */
- if (!arm_legitimate_index_p (mode, offset, SET, 0))
- {
- gcc_assert (can_create_pseudo_p ());
- offset = force_reg (Pmode, offset);
- }
+ return 0;
+}
- if (GET_CODE (offset) == CONST_INT)
- return plus_constant (base, INTVAL (offset));
- }
+/* Update the data in PCUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
- if (GET_MODE_SIZE (mode) > 4
- && (GET_MODE_CLASS (mode) == MODE_INT
- || TARGET_SOFT_FLOAT))
+static void
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type, bool named)
+{
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+
+ if (pcum->aapcs_cprc_slot >= 0)
{
- emit_insn (gen_addsi3 (reg, base, offset));
- return reg;
+ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+ type);
+ pcum->aapcs_cprc_slot = -1;
}
- return gen_rtx_PLUS (Pmode, base, offset);
+ /* Generic stuff. */
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ }
+ else
+ {
+ pcum->nargs += 1;
+ if (arm_vector_mode_supported_p (mode)
+ && pcum->named_count > pcum->nargs
+ && TARGET_IWMMXT_ABI)
+ pcum->iwmmxt_nregs += 1;
+ else
+ pcum->nregs += ARM_NUM_REGS2 (mode, type);
}
-
- return orig;
}
+/* Variable sized types are passed by reference. This is a GCC
+ extension to the ARM ABI. */
-/* Find a spare register to use during the prolog of a function. */
-
-static int
-thumb_find_work_register (unsigned long pushed_regs_mask)
+static bool
+arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
{
- int reg;
-
- /* Check the argument registers first as these are call-used. The
- register allocation order means that sometimes r3 might be used
- but earlier argument registers might not, so check them all. */
- for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
- if (!df_regs_ever_live_p (reg))
- return reg;
-
- /* Before going on to check the call-saved registers we can try a couple
- more ways of deducing that r3 is available. The first is when we are
- pushing anonymous arguments onto the stack and we have less than 4
- registers worth of fixed arguments(*). In this case r3 will be part of
- the variable argument list and so we can be sure that it will be
- pushed right at the start of the function. Hence it will be available
- for the rest of the prologue.
- (*): ie crtl->args.pretend_args_size is greater than 0. */
- if (cfun->machine->uses_anonymous_args
- && crtl->args.pretend_args_size > 0)
- return LAST_ARG_REGNUM;
-
- /* The other case is when we have fixed arguments but less than 4 registers
- worth. In this case r3 might be used in the body of the function, but
- it is not being used to convey an argument into the function. In theory
- we could just check crtl->args.size to see how many bytes are
- being passed in argument registers, but it seems that it is unreliable.
- Sometimes it will have the value 0 when in fact arguments are being
- passed. (See testcase execute/20021111-1.c for an example). So we also
- check the args_info.nregs field as well. The problem with this field is
- that it makes no allowances for arguments that are passed to the
- function but which are not used. Hence we could miss an opportunity
- when a function has an unused argument in r3. But it is better to be
- safe than to be sorry. */
- if (! cfun->machine->uses_anonymous_args
- && crtl->args.size >= 0
- && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
- && crtl->args.info.nregs < 4)
- return LAST_ARG_REGNUM;
-
- /* Otherwise look for a call-saved register that is going to be pushed. */
- for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
- if (pushed_regs_mask & (1 << reg))
- return reg;
-
- if (TARGET_THUMB2)
- {
- /* Thumb-2 can use high regs. */
- for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
- if (pushed_regs_mask & (1 << reg))
- return reg;
- }
- /* Something went wrong - thumb_compute_save_reg_mask()
- should have arranged for a suitable register to be pushed. */
- gcc_unreachable ();
+ return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
}
+\f
+/* Encode the current state of the #pragma [no_]long_calls. */
+typedef enum
+{
+ OFF, /* No #pragma [no_]long_calls is in effect. */
+ LONG, /* #pragma long_calls is in effect. */
+ SHORT /* #pragma no_long_calls is in effect. */
+} arm_pragma_enum;
-static GTY(()) int pic_labelno;
-
-/* Generate code to load the PIC register. In thumb mode SCRATCH is a
- low register. */
+static arm_pragma_enum arm_pragma_long_calls = OFF;
void
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
{
- rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
- rtx global_offset_table;
-
- if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
- return;
+ arm_pragma_long_calls = LONG;
+}
- gcc_assert (flag_pic);
+void
+arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+ arm_pragma_long_calls = SHORT;
+}
- pic_reg = cfun->machine->pic_reg;
- if (TARGET_VXWORKS_RTP)
+void
+arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+ arm_pragma_long_calls = OFF;
+}
+\f
+/* Handle an attribute requiring a FUNCTION_DECL;
+ arguments as in struct attribute_spec.handler. */
+static tree
+arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_DECL)
{
- pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
- pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
+ *no_add_attrs = true;
+ }
- emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
+ return NULL_TREE;
+}
- pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
- emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
+/* Handle an "interrupt" or "isr" attribute;
+ arguments as in struct attribute_spec.handler. */
+static tree
+arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
+ bool *no_add_attrs)
+{
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) != FUNCTION_DECL)
+ {
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
+ *no_add_attrs = true;
+ }
+ /* FIXME: the argument if any is checked for type attributes;
+ should it be checked for decl ones? */
}
else
{
- /* We use an UNSPEC rather than a LABEL_REF because this label
- never appears in the code stream. */
-
- labelno = GEN_INT (pic_labelno++);
- l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
- l1 = gen_rtx_CONST (VOIDmode, l1);
-
- global_offset_table
- = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
- /* On the ARM the PC register contains 'dot + 8' at the time of the
- addition, on the Thumb it is 'dot + 4'. */
- pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
- if (GOT_PCREL)
+ if (TREE_CODE (*node) == FUNCTION_TYPE
+ || TREE_CODE (*node) == METHOD_TYPE)
{
- pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
- pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
+ if (arm_isr_value (args) == ARM_FT_UNKNOWN)
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
+ *no_add_attrs = true;
+ }
}
- else
- pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
-
- pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
- pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
-
- if (TARGET_ARM)
+ else if (TREE_CODE (*node) == POINTER_TYPE
+ && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
+ || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
+ && arm_isr_value (args) != ARM_FT_UNKNOWN)
{
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
+ *node = build_variant_type_copy (*node);
+ TREE_TYPE (*node) = build_type_attribute_variant
+ (TREE_TYPE (*node),
+ tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
+ *no_add_attrs = true;
}
- else if (TARGET_THUMB2)
+ else
{
- /* Thumb-2 only allows very limited access to the PC. Calculate the
- address in a temporary register. */
- if (arm_pic_register != INVALID_REGNUM)
+ /* Possibly pass this attribute on from the type to a decl. */
+ if (flags & ((int) ATTR_FLAG_DECL_NEXT
+ | (int) ATTR_FLAG_FUNCTION_NEXT
+ | (int) ATTR_FLAG_ARRAY_NEXT))
{
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
+ *no_add_attrs = true;
+ return tree_cons (name, args, NULL_TREE);
}
else
{
- gcc_assert (can_create_pseudo_p ());
- pic_tmp = gen_reg_rtx (Pmode);
- }
-
- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
- }
- else /* TARGET_THUMB1 */
- {
- if (arm_pic_register != INVALID_REGNUM
- && REGNO (pic_reg) > LAST_LO_REGNUM)
- {
- /* We will have pushed the pic register, so we should always be
- able to find a work register. */
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
- emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
- emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
}
- else
- emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
}
}
- /* Need to emit this whether or not we obey regdecls,
- since setjmp/longjmp can cause life info to screw up. */
- emit_use (pic_reg);
+ return NULL_TREE;
}
-
-/* Return nonzero if X is valid as an ARM state addressing register. */
-static int
-arm_address_register_rtx_p (rtx x, int strict_p)
+/* Handle a "pcs" attribute; arguments as in struct
+ attribute_spec.handler. */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
- int regno;
-
- if (GET_CODE (x) != REG)
- return 0;
-
- regno = REGNO (x);
-
- if (strict_p)
- return ARM_REGNO_OK_FOR_BASE_P (regno);
-
- return (regno <= LAST_ARM_REGNUM
- || regno >= FIRST_PSEUDO_REGISTER
- || regno == FRAME_POINTER_REGNUM
- || regno == ARG_POINTER_REGNUM);
+ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+ return NULL_TREE;
}
-/* Return TRUE if this rtx is the difference of a symbol and a label,
- and will reduce to a PC-relative relocation in the object file.
- Expressions like this can be left alone when generating PIC, rather
- than forced through the GOT. */
-static int
-pcrel_constant_p (rtx x)
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+/* Handle the "notshared" attribute. This attribute is another way of
+ requesting hidden visibility. ARM's compiler supports
+ "__declspec(notshared)"; we support the same thing via an
+ attribute. */
+
+static tree
+arm_handle_notshared_attribute (tree *node,
+ tree name ATTRIBUTE_UNUSED,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
{
- if (GET_CODE (x) == MINUS)
- return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
+ tree decl = TYPE_NAME (*node);
- return FALSE;
+ if (decl)
+ {
+ DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+ DECL_VISIBILITY_SPECIFIED (decl) = 1;
+ *no_add_attrs = false;
+ }
+ return NULL_TREE;
}
+#endif
-/* Return nonzero if X is a valid ARM state address operand. */
-int
-arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
- int strict_p)
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+ are compatible, and 2 if they are nearly compatible (which causes a
+ warning to be generated). */
+static int
+arm_comp_type_attributes (const_tree type1, const_tree type2)
{
- bool use_ldrd;
- enum rtx_code code = GET_CODE (x);
+ int l1, l2, s1, s2;
- if (arm_address_register_rtx_p (x, strict_p))
+ /* Check for mismatch of non-default calling convention. */
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
return 1;
- use_ldrd = (TARGET_LDRD
- && (mode == DImode
- || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
-
- if (code == POST_INC || code == PRE_DEC
- || ((code == PRE_INC || code == POST_DEC)
- && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
- return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+ /* Check for mismatched call attributes. */
+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
- else if ((code == POST_MODIFY || code == PRE_MODIFY)
- && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
- && GET_CODE (XEXP (x, 1)) == PLUS
- && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+ /* Only bother to check if an attribute is defined. */
+ if (l1 | l2 | s1 | s2)
{
- rtx addend = XEXP (XEXP (x, 1), 1);
-
- /* Don't allow ldrd post increment by register because it's hard
- to fixup invalid register choices. */
- if (use_ldrd
- && GET_CODE (x) == POST_MODIFY
- && GET_CODE (addend) == REG)
+ /* If one type has an attribute, the other must have the same attribute. */
+ if ((l1 != l2) || (s1 != s2))
return 0;
- return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
- && arm_legitimate_index_p (mode, addend, outer, strict_p));
+ /* Disallow mixed attributes. */
+ if ((l1 & s2) || (l2 & s1))
+ return 0;
}
- /* After reload constants split into minipools will have addresses
- from a LABEL_REF. */
- else if (reload_completed
- && (code == LABEL_REF
- || (code == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
- return 1;
-
- else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+ /* Check for mismatched ISR attribute. */
+ l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
+ if (! l1)
+ l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
+ l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
+ if (! l2)
+ l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
+ if (l1 != l2)
return 0;
- else if (code == PLUS)
- {
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
-
- return ((arm_address_register_rtx_p (xop0, strict_p)
- && arm_legitimate_index_p (mode, xop1, outer, strict_p))
- || (arm_address_register_rtx_p (xop1, strict_p)
- && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
- }
+ return 1;
+}
-#if 0
- /* Reload currently can't handle MINUS, so disable this for now */
- else if (GET_CODE (x) == MINUS)
+/* Assigns default attributes to newly defined type. This is used to
+ set short_call/long_call attributes for function types of
+ functions defined inside corresponding #pragma scopes. */
+static void
+arm_set_default_type_attributes (tree type)
+{
+ /* Add __attribute__ ((long_call)) to all functions, when
+ inside #pragma long_calls or __attribute__ ((short_call)),
+ when inside #pragma no_long_calls. */
+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
{
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
-
- return (arm_address_register_rtx_p (xop0, strict_p)
- && arm_legitimate_index_p (mode, xop1, outer, strict_p));
- }
-#endif
+ tree type_attr_list, attr_name;
+ type_attr_list = TYPE_ATTRIBUTES (type);
- else if (GET_MODE_CLASS (mode) != MODE_FLOAT
- && code == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x)
- && ! (flag_pic
- && symbol_mentioned_p (get_pool_constant (x))
- && ! pcrel_constant_p (get_pool_constant (x))))
- return 1;
+ if (arm_pragma_long_calls == LONG)
+ attr_name = get_identifier ("long_call");
+ else if (arm_pragma_long_calls == SHORT)
+ attr_name = get_identifier ("short_call");
+ else
+ return;
- return 0;
+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
+ TYPE_ATTRIBUTES (type) = type_attr_list;
+ }
}
+\f
+/* Return true if DECL is known to be linked into section SECTION. */
-/* Return nonzero if X is a valid Thumb-2 address operand. */
-int
-thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+static bool
+arm_function_in_section_p (tree decl, section *section)
{
- bool use_ldrd;
- enum rtx_code code = GET_CODE (x);
-
- if (arm_address_register_rtx_p (x, strict_p))
- return 1;
-
- use_ldrd = (TARGET_LDRD
- && (mode == DImode
- || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+ /* We can only be certain about functions defined in the same
+ compilation unit. */
+ if (!TREE_STATIC (decl))
+ return false;
- if (code == POST_INC || code == PRE_DEC
- || ((code == PRE_INC || code == POST_DEC)
- && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
- return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+ /* Make sure that SYMBOL always binds to the definition in this
+ compilation unit. */
+ if (!targetm.binds_local_p (decl))
+ return false;
- else if ((code == POST_MODIFY || code == PRE_MODIFY)
- && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
- && GET_CODE (XEXP (x, 1)) == PLUS
- && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
+ if (!DECL_SECTION_NAME (decl))
{
- /* Thumb-2 only has autoincrement by constant. */
- rtx addend = XEXP (XEXP (x, 1), 1);
- HOST_WIDE_INT offset;
-
- if (GET_CODE (addend) != CONST_INT)
- return 0;
-
- offset = INTVAL(addend);
- if (GET_MODE_SIZE (mode) <= 4)
- return (offset > -256 && offset < 256);
-
- return (use_ldrd && offset > -1024 && offset < 1024
- && (offset & 3) == 0);
+ /* Make sure that we will not create a unique section for DECL. */
+ if (flag_function_sections || DECL_ONE_ONLY (decl))
+ return false;
}
- /* After reload constants split into minipools will have addresses
- from a LABEL_REF. */
- else if (reload_completed
- && (code == LABEL_REF
- || (code == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
- return 1;
-
- else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
- return 0;
+ return function_section (decl) == section;
+}
- else if (code == PLUS)
- {
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
+/* Return nonzero if a 32-bit "long_call" should be generated for
+ a call from the current function to DECL. We generate a long_call
+ if the function:
- return ((arm_address_register_rtx_p (xop0, strict_p)
- && thumb2_legitimate_index_p (mode, xop1, strict_p))
- || (arm_address_register_rtx_p (xop1, strict_p)
- && thumb2_legitimate_index_p (mode, xop0, strict_p)));
- }
+ a. has an __attribute__((long call))
+ or b. is within the scope of a #pragma long_calls
+ or c. the -mlong-calls command line switch has been specified
- else if (GET_MODE_CLASS (mode) != MODE_FLOAT
- && code == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x)
- && ! (flag_pic
- && symbol_mentioned_p (get_pool_constant (x))
- && ! pcrel_constant_p (get_pool_constant (x))))
- return 1;
+ However we do not generate a long call if the function:
- return 0;
-}
+ d. has an __attribute__ ((short_call))
+ or e. is inside the scope of a #pragma no_long_calls
+ or f. is defined in the same section as the current function. */
-/* Return nonzero if INDEX is valid for an address index operand in
- ARM state. */
-static int
-arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
- int strict_p)
+bool
+arm_is_long_call_p (tree decl)
{
- HOST_WIDE_INT range;
- enum rtx_code code = GET_CODE (index);
+ tree attrs;
- /* Standard coprocessor addressing modes. */
- if (TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_MAVERICK)
- && (GET_MODE_CLASS (mode) == MODE_FLOAT
- || (TARGET_MAVERICK && mode == DImode)))
- return (code == CONST_INT && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
+ if (!decl)
+ return TARGET_LONG_CALLS;
- if (TARGET_NEON
- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
- return (code == CONST_INT
- && INTVAL (index) < 1016
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("short_call", attrs))
+ return false;
- if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
- return (code == CONST_INT
- && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
+ /* For "f", be conservative, and only cater for cases in which the
+ whole of the current function is placed in the same section. */
+ if (!flag_reorder_blocks_and_partition
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && arm_function_in_section_p (decl, current_function_section ()))
+ return false;
- if (arm_address_register_rtx_p (index, strict_p)
- && (GET_MODE_SIZE (mode) <= 4))
- return 1;
+ if (lookup_attribute ("long_call", attrs))
+ return true;
- if (mode == DImode || mode == DFmode)
- {
- if (code == CONST_INT)
- {
- HOST_WIDE_INT val = INTVAL (index);
+ return TARGET_LONG_CALLS;
+}
- if (TARGET_LDRD)
- return val > -256 && val < 256;
- else
- return val > -4096 && val < 4092;
- }
+/* Return nonzero if it is ok to make a tail-call to DECL. */
+static bool
+arm_function_ok_for_sibcall (tree decl, tree exp)
+{
+ unsigned long func_type;
- return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
- }
+ if (cfun->machine->sibcall_blocked)
+ return false;
- if (GET_MODE_SIZE (mode) <= 4
- && ! (arm_arch4
- && (mode == HImode
- || (mode == QImode && outer == SIGN_EXTEND))))
- {
- if (code == MULT)
- {
- rtx xiop0 = XEXP (index, 0);
- rtx xiop1 = XEXP (index, 1);
+ /* Never tailcall something for which we have no decl, or if we
+ are generating code for Thumb-1. */
+ if (decl == NULL || TARGET_THUMB1)
+ return false;
- return ((arm_address_register_rtx_p (xiop0, strict_p)
- && power_of_two_operand (xiop1, SImode))
- || (arm_address_register_rtx_p (xiop1, strict_p)
- && power_of_two_operand (xiop0, SImode)));
- }
- else if (code == LSHIFTRT || code == ASHIFTRT
- || code == ASHIFT || code == ROTATERT)
- {
- rtx op = XEXP (index, 1);
+ /* The PIC register is live on entry to VxWorks PLT entries, so we
+ must make the call before restoring the PIC register. */
+ if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
+ return false;
- return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
- && GET_CODE (op) == CONST_INT
- && INTVAL (op) > 0
- && INTVAL (op) <= 31);
- }
- }
+ /* Cannot tail-call to long calls, since these are out of range of
+ a branch instruction. */
+ if (arm_is_long_call_p (decl))
+ return false;
- /* For ARM v4 we may be doing a sign-extend operation during the
- load. */
- if (arm_arch4)
+ /* If we are interworking and the function is not declared static
+ then we can't tail-call it unless we know that it exists in this
+ compilation unit (since it might be a Thumb routine). */
+ if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
+ return false;
+
+ func_type = arm_current_func_type ();
+ /* Never tailcall from an ISR routine - it needs a special exit sequence. */
+ if (IS_INTERRUPT (func_type))
+ return false;
+
+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
{
- if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
- range = 256;
- else
- range = 4096;
- }
- else
- range = (mode == HImode) ? 4095 : 4096;
+ /* Check that the return value locations are the same. For
+ example that we aren't returning a value from the sibling in
+ a VFP register but then need to transfer it to a core
+ register. */
+ rtx a, b;
- return (code == CONST_INT
- && INTVAL (index) < range
- && INTVAL (index) > -range);
-}
+ a = arm_function_value (TREE_TYPE (exp), decl, false);
+ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
-/* Return true if OP is a valid index scaling factor for Thumb-2 address
- index operand. i.e. 1, 2, 4 or 8. */
-static bool
-thumb2_index_mul_operand (rtx op)
-{
- HOST_WIDE_INT val;
-
- if (GET_CODE(op) != CONST_INT)
+ /* Never tailcall if function may be called with a misaligned SP. */
+ if (IS_STACKALIGN (func_type))
return false;
- val = INTVAL(op);
- return (val == 1 || val == 2 || val == 4 || val == 8);
+ /* Everything else is ok. */
+ return true;
}
-
-/* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
-static int
-thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
-{
- enum rtx_code code = GET_CODE (index);
- /* ??? Combine arm and thumb2 coprocessor addressing modes. */
- /* Standard coprocessor addressing modes. */
- if (TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_MAVERICK)
- && (GET_MODE_CLASS (mode) == MODE_FLOAT
- || (TARGET_MAVERICK && mode == DImode)))
- return (code == CONST_INT && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
+\f
+/* Addressing mode support functions. */
- if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
- {
- /* For DImode assume values will usually live in core regs
- and only allow LDRD addressing modes. */
- if (!TARGET_LDRD || mode != DImode)
- return (code == CONST_INT
- && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
- }
+/* Return nonzero if X is a legitimate immediate operand when compiling
+ for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
+int
+legitimate_pic_operand_p (rtx x)
+{
+ if (GET_CODE (x) == SYMBOL_REF
+ || (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
+ return 0;
- if (TARGET_NEON
- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
- return (code == CONST_INT
- && INTVAL (index) < 1016
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
+ return 1;
+}
- if (arm_address_register_rtx_p (index, strict_p)
- && (GET_MODE_SIZE (mode) <= 4))
- return 1;
+/* Record that the current function needs a PIC register. Initialize
+ cfun->machine->pic_reg if we have not already done so. */
- if (mode == DImode || mode == DFmode)
+static void
+require_pic_register (void)
+{
+ /* A lot of the logic here is made obscure by the fact that this
+ routine gets called as part of the rtx cost estimation process.
+ We don't want those calls to affect any assumptions about the real
+ function; and further, we can't call entry_of_function() until we
+ start the real expansion process. */
+ if (!crtl->uses_pic_offset_table)
{
- HOST_WIDE_INT val = INTVAL (index);
- /* ??? Can we assume ldrd for thumb2? */
- /* Thumb-2 ldrd only has reg+const addressing modes. */
- if (code != CONST_INT)
- return 0;
+ gcc_assert (can_create_pseudo_p ());
+ if (arm_pic_register != INVALID_REGNUM)
+ {
+ if (!cfun->machine->pic_reg)
+ cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
- /* ldrd supports offsets of +-1020.
- However the ldr fallback does not. */
- return val > -256 && val < 256 && (val & 3) == 0;
- }
+ /* Play games to avoid marking the function as needing pic
+ if we are being called as part of the cost-estimation
+ process. */
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+ crtl->uses_pic_offset_table = 1;
+ }
+ else
+ {
+ rtx seq;
- if (code == MULT)
- {
- rtx xiop0 = XEXP (index, 0);
- rtx xiop1 = XEXP (index, 1);
+ if (!cfun->machine->pic_reg)
+ cfun->machine->pic_reg = gen_reg_rtx (Pmode);
- return ((arm_address_register_rtx_p (xiop0, strict_p)
- && thumb2_index_mul_operand (xiop1))
- || (arm_address_register_rtx_p (xiop1, strict_p)
- && thumb2_index_mul_operand (xiop0)));
- }
- else if (code == ASHIFT)
- {
- rtx op = XEXP (index, 1);
+ /* Play games to avoid marking the function as needing pic
+ if we are being called as part of the cost-estimation
+ process. */
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+ {
+ crtl->uses_pic_offset_table = 1;
+ start_sequence ();
- return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
- && GET_CODE (op) == CONST_INT
- && INTVAL (op) > 0
- && INTVAL (op) <= 3);
- }
+ arm_load_pic_register (0UL);
- return (code == CONST_INT
- && INTVAL (index) < 4096
- && INTVAL (index) > -256);
+ seq = get_insns ();
+ end_sequence ();
+ /* We can be called during expansion of PHI nodes, where
+ we can't yet emit instructions directly in the final
+ insn stream. Queue the insns on the entry edge, they will
+ be committed after everything else is expanded. */
+ insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+ }
+ }
+ }
}
-/* Return nonzero if X is valid as a 16-bit Thumb state base register. */
-static int
-thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
{
- int regno;
+ if (GET_CODE (orig) == SYMBOL_REF
+ || GET_CODE (orig) == LABEL_REF)
+ {
+ rtx insn;
- if (GET_CODE (x) != REG)
- return 0;
+ if (reg == 0)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ reg = gen_reg_rtx (Pmode);
+ }
- regno = REGNO (x);
+ /* VxWorks does not impose a fixed gap between segments; the run-time
+ gap can be different from the object-file gap. We therefore can't
+ use GOTOFF unless we are absolutely sure that the symbol is in the
+ same segment as the GOT. Unfortunately, the flexibility of linker
+ scripts means that we can't be sure of that in general, so assume
+ that GOTOFF is never valid on VxWorks. */
+ if ((GET_CODE (orig) == LABEL_REF
+ || (GET_CODE (orig) == SYMBOL_REF &&
+ SYMBOL_REF_LOCAL_P (orig)))
+ && NEED_GOT_RELOC
+ && !TARGET_VXWORKS_RTP)
+ insn = arm_pic_static_addr (orig, reg);
+ else
+ {
+ rtx pat;
+ rtx mem;
- if (strict_p)
- return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
+ /* If this function doesn't have a pic register, create one now. */
+ require_pic_register ();
- return (regno <= LAST_LO_REGNUM
- || regno > LAST_VIRTUAL_REGISTER
- || regno == FRAME_POINTER_REGNUM
- || (GET_MODE_SIZE (mode) >= 4
- && (regno == STACK_POINTER_REGNUM
- || regno >= FIRST_PSEUDO_REGISTER
- || x == hard_frame_pointer_rtx
- || x == arg_pointer_rtx)));
-}
+ pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
-/* Return nonzero if x is a legitimate index register. This is the case
- for any base register that can access a QImode object. */
-inline static int
-thumb1_index_register_rtx_p (rtx x, int strict_p)
-{
- return thumb1_base_register_rtx_p (x, QImode, strict_p);
-}
+ /* Make the MEM as close to a constant as possible. */
+ mem = SET_SRC (pat);
+ gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
+ MEM_READONLY_P (mem) = 1;
+ MEM_NOTRAP_P (mem) = 1;
-/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
+ insn = emit_insn (pat);
+ }
- The AP may be eliminated to either the SP or the FP, so we use the
- least common denominator, e.g. SImode, and offsets from 0 to 64.
+ /* Put a REG_EQUAL note on this insn, so that it can be optimized
+ by loop. */
+ set_unique_reg_note (insn, REG_EQUAL, orig);
- ??? Verify whether the above is the right approach.
+ return reg;
+ }
+ else if (GET_CODE (orig) == CONST)
+ {
+ rtx base, offset;
- ??? Also, the FP may be eliminated to the SP, so perhaps that
- needs special handling also.
+ if (GET_CODE (XEXP (orig, 0)) == PLUS
+ && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
+ return orig;
- ??? Look at how the mips16 port solves this problem. It probably uses
- better ways to solve some of these problems.
+ /* Handle the case where we have: const (UNSPEC_TLS). */
+ if (GET_CODE (XEXP (orig, 0)) == UNSPEC
+ && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
+ return orig;
- Although it is not incorrect, we don't accept QImode and HImode
- addresses based on the frame pointer or arg pointer until the
- reload pass starts. This is so that eliminating such addresses
- into stack based ones won't produce impossible code. */
-int
-thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+ /* Handle the case where we have:
+ const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
+ CONST_INT. */
+ if (GET_CODE (XEXP (orig, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
+ && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
+ {
+ gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
+ return orig;
+ }
+
+ if (reg == 0)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ reg = gen_reg_rtx (Pmode);
+ }
+
+ gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+ offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+ base == reg ? 0 : reg);
+
+ if (GET_CODE (offset) == CONST_INT)
+ {
+ /* The base register doesn't really matter, we only want to
+ test the index for the appropriate mode. */
+ if (!arm_legitimate_index_p (mode, offset, SET, 0))
+ {
+ gcc_assert (can_create_pseudo_p ());
+ offset = force_reg (Pmode, offset);
+ }
+
+ if (GET_CODE (offset) == CONST_INT)
+ return plus_constant (base, INTVAL (offset));
+ }
+
+ if (GET_MODE_SIZE (mode) > 4
+ && (GET_MODE_CLASS (mode) == MODE_INT
+ || TARGET_SOFT_FLOAT))
+ {
+ emit_insn (gen_addsi3 (reg, base, offset));
+ return reg;
+ }
+
+ return gen_rtx_PLUS (Pmode, base, offset);
+ }
+
+ return orig;
+}
+
+
+/* Find a spare register to use during the prolog of a function. */
+
+static int
+thumb_find_work_register (unsigned long pushed_regs_mask)
+{
+ int reg;
+
+ /* Check the argument registers first as these are call-used. The
+ register allocation order means that sometimes r3 might be used
+ but earlier argument registers might not, so check them all. */
+ for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
+ if (!df_regs_ever_live_p (reg))
+ return reg;
+
+ /* Before going on to check the call-saved registers we can try a couple
+ more ways of deducing that r3 is available. The first is when we are
+ pushing anonymous arguments onto the stack and we have less than 4
+ registers worth of fixed arguments(*). In this case r3 will be part of
+ the variable argument list and so we can be sure that it will be
+ pushed right at the start of the function. Hence it will be available
+ for the rest of the prologue.
+ (*): ie crtl->args.pretend_args_size is greater than 0. */
+ if (cfun->machine->uses_anonymous_args
+ && crtl->args.pretend_args_size > 0)
+ return LAST_ARG_REGNUM;
+
+ /* The other case is when we have fixed arguments but less than 4 registers
+ worth. In this case r3 might be used in the body of the function, but
+ it is not being used to convey an argument into the function. In theory
+ we could just check crtl->args.size to see how many bytes are
+ being passed in argument registers, but it seems that it is unreliable.
+ Sometimes it will have the value 0 when in fact arguments are being
+ passed. (See testcase execute/20021111-1.c for an example). So we also
+ check the args_info.nregs field as well. The problem with this field is
+ that it makes no allowances for arguments that are passed to the
+ function but which are not used. Hence we could miss an opportunity
+ when a function has an unused argument in r3. But it is better to be
+ safe than to be sorry. */
+ if (! cfun->machine->uses_anonymous_args
+ && crtl->args.size >= 0
+ && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
+ && crtl->args.info.nregs < 4)
+ return LAST_ARG_REGNUM;
+
+ /* Otherwise look for a call-saved register that is going to be pushed. */
+ for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
+ if (pushed_regs_mask & (1 << reg))
+ return reg;
+
+ if (TARGET_THUMB2)
+ {
+ /* Thumb-2 can use high regs. */
+ for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
+ if (pushed_regs_mask & (1 << reg))
+ return reg;
+ }
+ /* Something went wrong - thumb_compute_save_reg_mask()
+ should have arranged for a suitable register to be pushed. */
+ gcc_unreachable ();
+}
+
+static GTY(()) int pic_labelno;
+
+/* Generate code to load the PIC register. In thumb mode SCRATCH is a
+ low register. */
+
+void
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+{
+ rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+
+ if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
+ return;
+
+ gcc_assert (flag_pic);
+
+ pic_reg = cfun->machine->pic_reg;
+ if (TARGET_VXWORKS_RTP)
+ {
+ pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+ pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+
+ emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
+
+ pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+ emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
+ }
+ else
+ {
+ /* We use an UNSPEC rather than a LABEL_REF because this label
+ never appears in the code stream. */
+
+ labelno = GEN_INT (pic_labelno++);
+ l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ l1 = gen_rtx_CONST (VOIDmode, l1);
+
+ /* On the ARM the PC register contains 'dot + 8' at the time of the
+ addition, on the Thumb it is 'dot + 4'. */
+ pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
+ pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
+ UNSPEC_GOTSYM_OFF);
+ pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+
+ if (TARGET_32BIT)
+ {
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+ if (TARGET_ARM)
+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
+ else
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
+ }
+ else /* TARGET_THUMB1 */
+ {
+ if (arm_pic_register != INVALID_REGNUM
+ && REGNO (pic_reg) > LAST_LO_REGNUM)
+ {
+ /* We will have pushed the pic register, so we should always be
+ able to find a work register. */
+ pic_tmp = gen_rtx_REG (SImode,
+ thumb_find_work_register (saved_regs));
+ emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
+ emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+ }
+ else
+ emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
+ }
+ }
+
+ /* Need to emit this whether or not we obey regdecls,
+ since setjmp/longjmp can cause life info to screw up. */
+ emit_use (pic_reg);
+}
+
+/* Generate code to load the address of a static var when flag_pic is set. */
+static rtx
+arm_pic_static_addr (rtx orig, rtx reg)
+{
+ rtx l1, labelno, offset_rtx, insn;
+
+ gcc_assert (flag_pic);
+
+ /* We use an UNSPEC rather than a LABEL_REF because this label
+ never appears in the code stream. */
+ labelno = GEN_INT (pic_labelno++);
+ l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ l1 = gen_rtx_CONST (VOIDmode, l1);
+
+ /* On the ARM the PC register contains 'dot + 8' at the time of the
+ addition, on the Thumb it is 'dot + 4'. */
+ offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
+ offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
+ UNSPEC_SYMBOL_OFFSET);
+ offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+
+ if (TARGET_32BIT)
+ {
+ emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
+ if (TARGET_ARM)
+ insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+ else
+ insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ }
+ else /* TARGET_THUMB1 */
+ {
+ emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
+ insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ }
+
+ return insn;
+}
+
+/* Return nonzero if X is valid as an ARM state addressing register. */
+static int
+arm_address_register_rtx_p (rtx x, int strict_p)
+{
+ int regno;
+
+ if (GET_CODE (x) != REG)
+ return 0;
+
+ regno = REGNO (x);
+
+ if (strict_p)
+ return ARM_REGNO_OK_FOR_BASE_P (regno);
+
+ return (regno <= LAST_ARM_REGNUM
+ || regno >= FIRST_PSEUDO_REGISTER
+ || regno == FRAME_POINTER_REGNUM
+ || regno == ARG_POINTER_REGNUM);
+}
+
+/* Return TRUE if this rtx is the difference of a symbol and a label,
+ and will reduce to a PC-relative relocation in the object file.
+ Expressions like this can be left alone when generating PIC, rather
+ than forced through the GOT. */
+static int
+pcrel_constant_p (rtx x)
+{
+ if (GET_CODE (x) == MINUS)
+ return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
+
+ return FALSE;
+}
+
+/* Return true if X will surely end up in an index register after next
+ splitting pass. */
+static bool
+will_be_in_index_register (const_rtx x)
+{
+ /* arm.md: calculate_pic_address will split this into a register. */
+ return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
+}
+
+/* Return nonzero if X is a valid ARM state address operand. */
+int
+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+ int strict_p)
+{
+ bool use_ldrd;
+ enum rtx_code code = GET_CODE (x);
+
+ if (arm_address_register_rtx_p (x, strict_p))
+ return 1;
+
+ use_ldrd = (TARGET_LDRD
+ && (mode == DImode
+ || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+ if (code == POST_INC || code == PRE_DEC
+ || ((code == PRE_INC || code == POST_DEC)
+ && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+ return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+ else if ((code == POST_MODIFY || code == PRE_MODIFY)
+ && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+ && GET_CODE (XEXP (x, 1)) == PLUS
+ && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+ {
+ rtx addend = XEXP (XEXP (x, 1), 1);
+
+ /* Don't allow ldrd post increment by register because it's hard
+ to fixup invalid register choices. */
+ if (use_ldrd
+ && GET_CODE (x) == POST_MODIFY
+ && GET_CODE (addend) == REG)
+ return 0;
+
+ return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
+ && arm_legitimate_index_p (mode, addend, outer, strict_p));
+ }
+
+ /* After reload constants split into minipools will have addresses
+ from a LABEL_REF. */
+ else if (reload_completed
+ && (code == LABEL_REF
+ || (code == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+ return 1;
+
+ else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+ return 0;
+
+ else if (code == PLUS)
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ return ((arm_address_register_rtx_p (xop0, strict_p)
+ && ((GET_CODE(xop1) == CONST_INT
+ && arm_legitimate_index_p (mode, xop1, outer, strict_p))
+ || (!strict_p && will_be_in_index_register (xop1))))
+ || (arm_address_register_rtx_p (xop1, strict_p)
+ && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
+ }
+
+#if 0
+ /* Reload currently can't handle MINUS, so disable this for now */
+ else if (GET_CODE (x) == MINUS)
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ return (arm_address_register_rtx_p (xop0, strict_p)
+ && arm_legitimate_index_p (mode, xop1, outer, strict_p));
+ }
+#endif
+
+ else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+ && code == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x)
+ && ! (flag_pic
+ && symbol_mentioned_p (get_pool_constant (x))
+ && ! pcrel_constant_p (get_pool_constant (x))))
+ return 1;
+
+ return 0;
+}
+
+/* Return nonzero if X is a valid Thumb-2 address operand. */
+static int
+thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+ bool use_ldrd;
+ enum rtx_code code = GET_CODE (x);
+
+ if (arm_address_register_rtx_p (x, strict_p))
+ return 1;
+
+ use_ldrd = (TARGET_LDRD
+ && (mode == DImode
+ || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+ if (code == POST_INC || code == PRE_DEC
+ || ((code == PRE_INC || code == POST_DEC)
+ && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+ return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+ else if ((code == POST_MODIFY || code == PRE_MODIFY)
+ && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+ && GET_CODE (XEXP (x, 1)) == PLUS
+ && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+ {
+ /* Thumb-2 only has autoincrement by constant. */
+ rtx addend = XEXP (XEXP (x, 1), 1);
+ HOST_WIDE_INT offset;
+
+ if (GET_CODE (addend) != CONST_INT)
+ return 0;
+
+ offset = INTVAL(addend);
+ if (GET_MODE_SIZE (mode) <= 4)
+ return (offset > -256 && offset < 256);
+
+ return (use_ldrd && offset > -1024 && offset < 1024
+ && (offset & 3) == 0);
+ }
+
+ /* After reload constants split into minipools will have addresses
+ from a LABEL_REF. */
+ else if (reload_completed
+ && (code == LABEL_REF
+ || (code == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+ return 1;
+
+ else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+ return 0;
+
+ else if (code == PLUS)
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ return ((arm_address_register_rtx_p (xop0, strict_p)
+ && (thumb2_legitimate_index_p (mode, xop1, strict_p)
+ || (!strict_p && will_be_in_index_register (xop1))))
+ || (arm_address_register_rtx_p (xop1, strict_p)
+ && thumb2_legitimate_index_p (mode, xop0, strict_p)));
+ }
+
+ else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+ && code == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x)
+ && ! (flag_pic
+ && symbol_mentioned_p (get_pool_constant (x))
+ && ! pcrel_constant_p (get_pool_constant (x))))
+ return 1;
+
+ return 0;
+}
+
+/* Return nonzero if INDEX is valid for an address index operand in
+ ARM state. */
+static int
+arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
+ int strict_p)
+{
+ HOST_WIDE_INT range;
+ enum rtx_code code = GET_CODE (index);
+
+ /* Standard coprocessor addressing modes. */
+ if (TARGET_HARD_FLOAT
+ && (TARGET_FPA || TARGET_MAVERICK)
+ && (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || (TARGET_MAVERICK && mode == DImode)))
+ return (code == CONST_INT && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
+ if (TARGET_NEON
+ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ return (code == CONST_INT
+ && INTVAL (index) < 1016
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
+ if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+ return (code == CONST_INT
+ && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
+ if (arm_address_register_rtx_p (index, strict_p)
+ && (GET_MODE_SIZE (mode) <= 4))
+ return 1;
+
+ if (mode == DImode || mode == DFmode)
+ {
+ if (code == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (index);
+
+ if (TARGET_LDRD)
+ return val > -256 && val < 256;
+ else
+ return val > -4096 && val < 4092;
+ }
+
+ return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+ }
+
+ if (GET_MODE_SIZE (mode) <= 4
+ && ! (arm_arch4
+ && (mode == HImode
+ || mode == HFmode
+ || (mode == QImode && outer == SIGN_EXTEND))))
+ {
+ if (code == MULT)
+ {
+ rtx xiop0 = XEXP (index, 0);
+ rtx xiop1 = XEXP (index, 1);
+
+ return ((arm_address_register_rtx_p (xiop0, strict_p)
+ && power_of_two_operand (xiop1, SImode))
+ || (arm_address_register_rtx_p (xiop1, strict_p)
+ && power_of_two_operand (xiop0, SImode)));
+ }
+ else if (code == LSHIFTRT || code == ASHIFTRT
+ || code == ASHIFT || code == ROTATERT)
+ {
+ rtx op = XEXP (index, 1);
+
+ return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+ && GET_CODE (op) == CONST_INT
+ && INTVAL (op) > 0
+ && INTVAL (op) <= 31);
+ }
+ }
+
+ /* For ARM v4 we may be doing a sign-extend operation during the
+ load. */
+ if (arm_arch4)
+ {
+ if (mode == HImode
+ || mode == HFmode
+ || (outer == SIGN_EXTEND && mode == QImode))
+ range = 256;
+ else
+ range = 4096;
+ }
+ else
+ range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
+
+ return (code == CONST_INT
+ && INTVAL (index) < range
+ && INTVAL (index) > -range);
+}
+
+/* Return true if OP is a valid index scaling factor for Thumb-2 address
+ index operand. i.e. 1, 2, 4 or 8. */
+static bool
+thumb2_index_mul_operand (rtx op)
+{
+ HOST_WIDE_INT val;
+
+ if (GET_CODE(op) != CONST_INT)
+ return false;
+
+ val = INTVAL(op);
+ return (val == 1 || val == 2 || val == 4 || val == 8);
+}
+
+/* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
+static int
+thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
+{
+ enum rtx_code code = GET_CODE (index);
+
+ /* ??? Combine arm and thumb2 coprocessor addressing modes. */
+ /* Standard coprocessor addressing modes. */
+ if (TARGET_HARD_FLOAT
+ && (TARGET_FPA || TARGET_MAVERICK)
+ && (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || (TARGET_MAVERICK && mode == DImode)))
+ return (code == CONST_INT && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
+ if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+ {
+ /* For DImode assume values will usually live in core regs
+ and only allow LDRD addressing modes. */
+ if (!TARGET_LDRD || mode != DImode)
+ return (code == CONST_INT
+ && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+ }
+
+ if (TARGET_NEON
+ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ return (code == CONST_INT
+ && INTVAL (index) < 1016
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
+ if (arm_address_register_rtx_p (index, strict_p)
+ && (GET_MODE_SIZE (mode) <= 4))
+ return 1;
+
+ if (mode == DImode || mode == DFmode)
+ {
+ if (code == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (index);
+ /* ??? Can we assume ldrd for thumb2? */
+ /* Thumb-2 ldrd only has reg+const addressing modes. */
+ /* ldrd supports offsets of +-1020.
+ However the ldr fallback does not. */
+ return val > -256 && val < 256 && (val & 3) == 0;
+ }
+ else
+ return 0;
+ }
+
+ if (code == MULT)
+ {
+ rtx xiop0 = XEXP (index, 0);
+ rtx xiop1 = XEXP (index, 1);
+
+ return ((arm_address_register_rtx_p (xiop0, strict_p)
+ && thumb2_index_mul_operand (xiop1))
+ || (arm_address_register_rtx_p (xiop1, strict_p)
+ && thumb2_index_mul_operand (xiop0)));
+ }
+ else if (code == ASHIFT)
+ {
+ rtx op = XEXP (index, 1);
+
+ return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+ && GET_CODE (op) == CONST_INT
+ && INTVAL (op) > 0
+ && INTVAL (op) <= 3);
+ }
+
+ return (code == CONST_INT
+ && INTVAL (index) < 4096
+ && INTVAL (index) > -256);
+}
+
+/* Return nonzero if X is valid as a 16-bit Thumb state base register. */
+static int
+thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
+{
+ int regno;
+
+ if (GET_CODE (x) != REG)
+ return 0;
+
+ regno = REGNO (x);
+
+ if (strict_p)
+ return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
+
+ return (regno <= LAST_LO_REGNUM
+ || regno > LAST_VIRTUAL_REGISTER
+ || regno == FRAME_POINTER_REGNUM
+ || (GET_MODE_SIZE (mode) >= 4
+ && (regno == STACK_POINTER_REGNUM
+ || regno >= FIRST_PSEUDO_REGISTER
+ || x == hard_frame_pointer_rtx
+ || x == arg_pointer_rtx)));
+}
+
+/* Return nonzero if x is a legitimate index register. This is the case
+ for any base register that can access a QImode object. */
+inline static int
+thumb1_index_register_rtx_p (rtx x, int strict_p)
+{
+ return thumb1_base_register_rtx_p (x, QImode, strict_p);
+}
+
+/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
+
+ The AP may be eliminated to either the SP or the FP, so we use the
+ least common denominator, e.g. SImode, and offsets from 0 to 64.
+
+ ??? Verify whether the above is the right approach.
+
+ ??? Also, the FP may be eliminated to the SP, so perhaps that
+ needs special handling also.
+
+ ??? Look at how the mips16 port solves this problem. It probably uses
+ better ways to solve some of these problems.
+
+ Although it is not incorrect, we don't accept QImode and HImode
+ addresses based on the frame pointer or arg pointer until the
+ reload pass starts. This is so that eliminating such addresses
+ into stack based ones won't produce impossible code. */
+static int
+thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+ /* ??? Not clear if this is right. Experiment. */
+ if (GET_MODE_SIZE (mode) < 4
+ && !(reload_in_progress || reload_completed)
+ && (reg_mentioned_p (frame_pointer_rtx, x)
+ || reg_mentioned_p (arg_pointer_rtx, x)
+ || reg_mentioned_p (virtual_incoming_args_rtx, x)
+ || reg_mentioned_p (virtual_outgoing_args_rtx, x)
+ || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
+ || reg_mentioned_p (virtual_stack_vars_rtx, x)))
+ return 0;
+
+ /* Accept any base register. SP only in SImode or larger. */
+ else if (thumb1_base_register_rtx_p (x, mode, strict_p))
+ return 1;
+
+ /* This is PC relative data before arm_reorg runs. */
+ else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
+ && GET_CODE (x) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
+ return 1;
+
+ /* This is PC relative data after arm_reorg runs. */
+ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
+ && reload_completed
+ && (GET_CODE (x) == LABEL_REF
+ || (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+ return 1;
+
+ /* Post-inc indexing only supported for SImode and larger. */
+ else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
+ && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
+ return 1;
+
+ else if (GET_CODE (x) == PLUS)
+ {
+ /* REG+REG address can be any two index registers. */
+ /* We disallow FRAME+REG addressing since we know that FRAME
+ will be replaced with STACK, and SP relative addressing only
+ permits SP+OFFSET. */
+ if (GET_MODE_SIZE (mode) <= 4
+ && XEXP (x, 0) != frame_pointer_rtx
+ && XEXP (x, 1) != frame_pointer_rtx
+ && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+ && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
+ || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
+ return 1;
+
+ /* REG+const has 5-7 bit offset for non-SP registers. */
+ else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+ || XEXP (x, 0) == arg_pointer_rtx)
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+ return 1;
+
+ /* REG+const has 10-bit offset for SP, but only SImode and
+ larger is supported. */
+ /* ??? Should probably check for DI/DFmode overflow here
+ just like GO_IF_LEGITIMATE_OFFSET does. */
+ else if (GET_CODE (XEXP (x, 0)) == REG
+ && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
+ && GET_MODE_SIZE (mode) >= 4
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && INTVAL (XEXP (x, 1)) >= 0
+ && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
+ && (INTVAL (XEXP (x, 1)) & 3) == 0)
+ return 1;
+
+ else if (GET_CODE (XEXP (x, 0)) == REG
+ && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
+ || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
+ || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
+ && REGNO (XEXP (x, 0))
+ <= LAST_VIRTUAL_POINTER_REGISTER))
+ && GET_MODE_SIZE (mode) >= 4
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (INTVAL (XEXP (x, 1)) & 3) == 0)
+ return 1;
+ }
+
+ else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+ && GET_MODE_SIZE (mode) == 4
+ && GET_CODE (x) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (x)
+ && ! (flag_pic
+ && symbol_mentioned_p (get_pool_constant (x))
+ && ! pcrel_constant_p (get_pool_constant (x))))
+ return 1;
+
+ return 0;
+}
+
+/* Return nonzero if VAL can be used as an offset in a Thumb-state address
+ instruction of mode MODE. */
+int
+thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
+{
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ return val >= 0 && val < 32;
+
+ case 2:
+ return val >= 0 && val < 64 && (val & 1) == 0;
+
+ default:
+ return (val >= 0
+ && (val + GET_MODE_SIZE (mode)) <= 128
+ && (val & 3) == 0);
+ }
+}
+
+bool
+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+ if (TARGET_ARM)
+ return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
+ else if (TARGET_THUMB2)
+ return thumb2_legitimate_address_p (mode, x, strict_p);
+ else /* if (TARGET_THUMB1) */
+ return thumb1_legitimate_address_p (mode, x, strict_p);
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr. */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+ if (!tls_get_addr_libfunc)
+ tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+ return tls_get_addr_libfunc;
+}
+
+static rtx
+arm_load_tp (rtx target)
+{
+ if (!target)
+ target = gen_reg_rtx (SImode);
+
+ if (TARGET_HARD_TP)
+ {
+ /* Can return in any reg. */
+ emit_insn (gen_load_tp_hard (target));
+ }
+ else
+ {
+ /* Always returned in r0. Immediately copy the result into a pseudo,
+ otherwise other uses of r0 (e.g. setting up function arguments) may
+ clobber the value. */
+
+ rtx tmp;
+
+ emit_insn (gen_load_tp_soft ());
+
+ tmp = gen_rtx_REG (SImode, 0);
+ emit_move_insn (target, tmp);
+ }
+ return target;
+}
+
+static rtx
+load_tls_operand (rtx x, rtx reg)
+{
+ rtx tmp;
+
+ if (reg == NULL_RTX)
+ reg = gen_reg_rtx (SImode);
+
+ tmp = gen_rtx_CONST (SImode, x);
+
+ emit_move_insn (reg, tmp);
+
+ return reg;
+}
+
+static rtx
+arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
+{
+ rtx insns, label, labelno, sum;
+
+ start_sequence ();
+
+ labelno = GEN_INT (pic_labelno++);
+ label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ label = gen_rtx_CONST (VOIDmode, label);
+
+ sum = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (4, x, GEN_INT (reloc), label,
+ GEN_INT (TARGET_ARM ? 8 : 4)),
+ UNSPEC_TLS);
+ reg = load_tls_operand (sum, reg);
+
+ if (TARGET_ARM)
+ emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+ else if (TARGET_THUMB2)
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ else /* TARGET_THUMB1 */
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+
+ *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
+ Pmode, 1, reg, Pmode);
+
+ insns = get_insns ();
+ end_sequence ();
+
+ return insns;
+}
+
+rtx
+legitimize_tls_address (rtx x, rtx reg)
+{
+ rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
+ unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+
+ switch (model)
+ {
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
+ dest = gen_reg_rtx (Pmode);
+ emit_libcall_block (insns, dest, ret, x);
+ return dest;
+
+ case TLS_MODEL_LOCAL_DYNAMIC:
+ insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
+
+ /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+ share the LDM result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
+ UNSPEC_TLS);
+ dest = gen_reg_rtx (Pmode);
+ emit_libcall_block (insns, dest, ret, eqv);
+
+ /* Load the addend. */
+ addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
+ UNSPEC_TLS);
+ addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
+ return gen_rtx_PLUS (Pmode, dest, addend);
+
+ case TLS_MODEL_INITIAL_EXEC:
+ labelno = GEN_INT (pic_labelno++);
+ label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ label = gen_rtx_CONST (VOIDmode, label);
+ sum = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
+ GEN_INT (TARGET_ARM ? 8 : 4)),
+ UNSPEC_TLS);
+ reg = load_tls_operand (sum, reg);
+
+ if (TARGET_ARM)
+ emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
+ else if (TARGET_THUMB2)
+ emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
+ else
+ {
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ emit_move_insn (reg, gen_const_mem (SImode, reg));
+ }
+
+ tp = arm_load_tp (NULL_RTX);
+
+ return gen_rtx_PLUS (Pmode, tp, reg);
+
+ case TLS_MODEL_LOCAL_EXEC:
+ tp = arm_load_tp (NULL_RTX);
+
+ reg = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (2, x, GEN_INT (TLS_LE32)),
+ UNSPEC_TLS);
+ reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
+
+ return gen_rtx_PLUS (Pmode, tp, reg);
+
+ default:
+ abort ();
+ }
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address. */
+rtx
+arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+{
+ if (!TARGET_ARM)
+ {
+ /* TODO: legitimize_address for Thumb2. */
+ if (TARGET_THUMB2)
+ return x;
+ return thumb_legitimize_address (x, orig_x, mode);
+ }
+
+ if (arm_tls_symbol_p (x))
+ return legitimize_tls_address (x, NULL_RTX);
+
+ if (GET_CODE (x) == PLUS)
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
+ xop0 = force_reg (SImode, xop0);
+
+ if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
+ xop1 = force_reg (SImode, xop1);
+
+ if (ARM_BASE_REGISTER_RTX_P (xop0)
+ && GET_CODE (xop1) == CONST_INT)
+ {
+ HOST_WIDE_INT n, low_n;
+ rtx base_reg, val;
+ n = INTVAL (xop1);
+
+ /* VFP addressing modes actually allow greater offsets, but for
+ now we just stick with the lowest common denominator. */
+ if (mode == DImode
+ || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
+ {
+ low_n = n & 0x0f;
+ n &= ~0x0f;
+ if (low_n > 4)
+ {
+ n += 16;
+ low_n -= 16;
+ }
+ }
+ else
+ {
+ low_n = ((mode) == TImode ? 0
+ : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
+ n -= low_n;
+ }
+
+ base_reg = gen_reg_rtx (SImode);
+ val = force_operand (plus_constant (xop0, n), NULL_RTX);
+ emit_move_insn (base_reg, val);
+ x = plus_constant (base_reg, low_n);
+ }
+ else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+ x = gen_rtx_PLUS (SImode, xop0, xop1);
+ }
+
+ /* XXX We don't allow MINUS any more -- see comment in
+ arm_legitimate_address_outer_p (). */
+ else if (GET_CODE (x) == MINUS)
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ if (CONSTANT_P (xop0))
+ xop0 = force_reg (SImode, xop0);
+
+ if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
+ xop1 = force_reg (SImode, xop1);
+
+ if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+ x = gen_rtx_MINUS (SImode, xop0, xop1);
+ }
+
+ /* Make sure to take full advantage of the pre-indexed addressing mode
+ with absolute addresses which often allows for the base register to
+ be factorized for multiple adjacent memory references, and it might
+ even allows for the mini pool to be avoided entirely. */
+ else if (GET_CODE (x) == CONST_INT && optimize > 0)
+ {
+ unsigned int bits;
+ HOST_WIDE_INT mask, base, index;
+ rtx base_reg;
+
+ /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
+ use a 8-bit index. So let's use a 12-bit index for SImode only and
+ hope that arm_gen_constant will enable ldrb to use more bits. */
+ bits = (mode == SImode) ? 12 : 8;
+ mask = (1 << bits) - 1;
+ base = INTVAL (x) & ~mask;
+ index = INTVAL (x) & mask;
+ if (bit_count (base & 0xffffffff) > (32 - bits)/2)
+ {
+ /* It'll most probably be more efficient to generate the base
+ with more bits set and use a negative index instead. */
+ base |= mask;
+ index -= mask;
+ }
+ base_reg = force_reg (SImode, GEN_INT (base));
+ x = plus_constant (base_reg, index);
+ }
+
+ if (flag_pic)
+ {
+ /* We need to find and carefully transform any SYMBOL and LABEL
+ references; so go back to the original address expression. */
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+
+ if (new_x != orig_x)
+ x = new_x;
+ }
+
+ return x;
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate Thumb address
+ to be legitimate. If we find one, return the new, valid address. */
+rtx
+thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
{
- /* ??? Not clear if this is right. Experiment. */
- if (GET_MODE_SIZE (mode) < 4
- && !(reload_in_progress || reload_completed)
- && (reg_mentioned_p (frame_pointer_rtx, x)
- || reg_mentioned_p (arg_pointer_rtx, x)
- || reg_mentioned_p (virtual_incoming_args_rtx, x)
- || reg_mentioned_p (virtual_outgoing_args_rtx, x)
- || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
- || reg_mentioned_p (virtual_stack_vars_rtx, x)))
- return 0;
+ if (arm_tls_symbol_p (x))
+ return legitimize_tls_address (x, NULL_RTX);
- /* Accept any base register. SP only in SImode or larger. */
- else if (thumb1_base_register_rtx_p (x, mode, strict_p))
- return 1;
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
+ || INTVAL (XEXP (x, 1)) < 0))
+ {
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+ HOST_WIDE_INT offset = INTVAL (xop1);
- /* This is PC relative data before arm_reorg runs. */
- else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
- && GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
- return 1;
+ /* Try and fold the offset into a biasing of the base register and
+ then offsetting that. Don't do this when optimizing for space
+ since it can cause too many CSEs. */
+ if (optimize_size && offset >= 0
+ && offset < 256 + 31 * GET_MODE_SIZE (mode))
+ {
+ HOST_WIDE_INT delta;
- /* This is PC relative data after arm_reorg runs. */
- else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
- && (GET_CODE (x) == LABEL_REF
- || (GET_CODE (x) == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
- return 1;
+ if (offset >= 256)
+ delta = offset - (256 - GET_MODE_SIZE (mode));
+ else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
+ delta = 31 * GET_MODE_SIZE (mode);
+ else
+ delta = offset & (~31 * GET_MODE_SIZE (mode));
- /* Post-inc indexing only supported for SImode and larger. */
- else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
- && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
- return 1;
+ xop0 = force_operand (plus_constant (xop0, offset - delta),
+ NULL_RTX);
+ x = plus_constant (xop0, delta);
+ }
+ else if (offset < 0 && offset > -256)
+ /* Small negative offsets are best done with a subtract before the
+ dereference, forcing these into a register normally takes two
+ instructions. */
+ x = force_operand (x, NULL_RTX);
+ else
+ {
+ /* For the remaining cases, force the constant into a register. */
+ xop1 = force_reg (SImode, xop1);
+ x = gen_rtx_PLUS (SImode, xop0, xop1);
+ }
+ }
+ else if (GET_CODE (x) == PLUS
+ && s_register_operand (XEXP (x, 1), SImode)
+ && !s_register_operand (XEXP (x, 0), SImode))
+ {
+ rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
- else if (GET_CODE (x) == PLUS)
+ x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
+ }
+
+ if (flag_pic)
{
- /* REG+REG address can be any two index registers. */
- /* We disallow FRAME+REG addressing since we know that FRAME
- will be replaced with STACK, and SP relative addressing only
- permits SP+OFFSET. */
- if (GET_MODE_SIZE (mode) <= 4
- && XEXP (x, 0) != frame_pointer_rtx
- && XEXP (x, 1) != frame_pointer_rtx
- && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
- && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
- return 1;
+ /* We need to find and carefully transform any SYMBOL and LABEL
+ references; so go back to the original address expression. */
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
- /* REG+const has 5-7 bit offset for non-SP registers. */
- else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
- || XEXP (x, 0) == arg_pointer_rtx)
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
- return 1;
+ if (new_x != orig_x)
+ x = new_x;
+ }
- /* REG+const has 10-bit offset for SP, but only SImode and
- larger is supported. */
- /* ??? Should probably check for DI/DFmode overflow here
- just like GO_IF_LEGITIMATE_OFFSET does. */
- else if (GET_CODE (XEXP (x, 0)) == REG
- && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
- && GET_MODE_SIZE (mode) >= 4
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && INTVAL (XEXP (x, 1)) >= 0
- && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
- && (INTVAL (XEXP (x, 1)) & 3) == 0)
- return 1;
+ return x;
+}
- else if (GET_CODE (XEXP (x, 0)) == REG
- && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
- || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
- || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
- && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
- && GET_MODE_SIZE (mode) >= 4
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && (INTVAL (XEXP (x, 1)) & 3) == 0)
- return 1;
+rtx
+thumb_legitimize_reload_address (rtx *x_p,
+ enum machine_mode mode,
+ int opnum, int type,
+ int ind_levels ATTRIBUTE_UNUSED)
+{
+ rtx x = *x_p;
+
+ if (GET_CODE (x) == PLUS
+ && GET_MODE_SIZE (mode) < 4
+ && REG_P (XEXP (x, 0))
+ && XEXP (x, 0) == stack_pointer_rtx
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+ {
+ rtx orig_x = x;
+
+ x = copy_rtx (x);
+ push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+ return x;
}
- else if (GET_MODE_CLASS (mode) != MODE_FLOAT
- && GET_MODE_SIZE (mode) == 4
- && GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x)
- && ! (flag_pic
- && symbol_mentioned_p (get_pool_constant (x))
- && ! pcrel_constant_p (get_pool_constant (x))))
- return 1;
+ /* If both registers are hi-regs, then it's better to reload the
+ entire expression rather than each register individually. That
+ only requires one reload register rather than two. */
+ if (GET_CODE (x) == PLUS
+ && REG_P (XEXP (x, 0))
+ && REG_P (XEXP (x, 1))
+ && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
+ && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
+ {
+ rtx orig_x = x;
+
+ x = copy_rtx (x);
+ push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+ return x;
+ }
+
+ return NULL;
+}
+
+/* Test for various thread-local symbols. */
+
+/* Return TRUE if X is a thread-local symbol. */
+
+static bool
+arm_tls_symbol_p (rtx x)
+{
+ if (! TARGET_HAVE_TLS)
+ return false;
- return 0;
+ if (GET_CODE (x) != SYMBOL_REF)
+ return false;
+
+ return SYMBOL_REF_TLS_MODEL (x) != 0;
}
-/* Return nonzero if VAL can be used as an offset in a Thumb-state address
- instruction of mode MODE. */
-int
-thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
+/* Helper for arm_tls_referenced_p. */
+
+static int
+arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
{
- switch (GET_MODE_SIZE (mode))
- {
- case 1:
- return val >= 0 && val < 32;
+ if (GET_CODE (*x) == SYMBOL_REF)
+ return SYMBOL_REF_TLS_MODEL (*x) != 0;
- case 2:
- return val >= 0 && val < 64 && (val & 1) == 0;
+ /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+ TLS offsets, not real symbol references. */
+ if (GET_CODE (*x) == UNSPEC
+ && XINT (*x, 1) == UNSPEC_TLS)
+ return -1;
- default:
- return (val >= 0
- && (val + GET_MODE_SIZE (mode)) <= 128
- && (val & 3) == 0);
- }
+ return 0;
}
-/* Build the SYMBOL_REF for __tls_get_addr. */
-
-static GTY(()) rtx tls_get_addr_libfunc;
+/* Return TRUE if X contains any TLS symbol references. */
-static rtx
-get_tls_get_addr (void)
+bool
+arm_tls_referenced_p (rtx x)
{
- if (!tls_get_addr_libfunc)
- tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
- return tls_get_addr_libfunc;
+ if (! TARGET_HAVE_TLS)
+ return false;
+
+ return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
}
-static rtx
-arm_load_tp (rtx target)
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
+
+bool
+arm_cannot_force_const_mem (rtx x)
{
- if (!target)
- target = gen_reg_rtx (SImode);
+ rtx base, offset;
- if (TARGET_HARD_TP)
+ if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
{
- /* Can return in any reg. */
- emit_insn (gen_load_tp_hard (target));
+ split_const (x, &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ return true;
}
- else
- {
- /* Always returned in r0. Immediately copy the result into a pseudo,
- otherwise other uses of r0 (e.g. setting up function arguments) may
- clobber the value. */
+ return arm_tls_referenced_p (x);
+}
+\f
+#define REG_OR_SUBREG_REG(X) \
+ (GET_CODE (X) == REG \
+ || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
- rtx tmp;
+#define REG_OR_SUBREG_RTX(X) \
+ (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
- emit_insn (gen_load_tp_soft ());
+static inline int
+thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+ enum machine_mode mode = GET_MODE (x);
+ int total;
- tmp = gen_rtx_REG (SImode, 0);
- emit_move_insn (target, tmp);
- }
- return target;
-}
+ switch (code)
+ {
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ case PLUS:
+ case MINUS:
+ case COMPARE:
+ case NEG:
+ case NOT:
+ return COSTS_N_INSNS (1);
-static rtx
-load_tls_operand (rtx x, rtx reg)
-{
- rtx tmp;
+ case MULT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ int cycles = 0;
+ unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
- if (reg == NULL_RTX)
- reg = gen_reg_rtx (SImode);
+ while (i)
+ {
+ i >>= 2;
+ cycles++;
+ }
+ return COSTS_N_INSNS (2) + cycles;
+ }
+ return COSTS_N_INSNS (1) + 16;
- tmp = gen_rtx_CONST (SImode, x);
+ case SET:
+ return (COSTS_N_INSNS (1)
+ + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
+ + GET_CODE (SET_DEST (x)) == MEM));
- emit_move_insn (reg, tmp);
+ case CONST_INT:
+ if (outer == SET)
+ {
+ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+ return 0;
+ if (thumb_shiftable_const (INTVAL (x)))
+ return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (3);
+ }
+ else if ((outer == PLUS || outer == COMPARE)
+ && INTVAL (x) < 256 && INTVAL (x) > -256)
+ return 0;
+ else if ((outer == IOR || outer == XOR || outer == AND)
+ && INTVAL (x) < 256 && INTVAL (x) >= -256)
+ return COSTS_N_INSNS (1);
+ else if (outer == AND)
+ {
+ int i;
+ /* This duplicates the tests in the andsi3 expander. */
+ for (i = 9; i <= 31; i++)
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+ return COSTS_N_INSNS (2);
+ }
+ else if (outer == ASHIFT || outer == ASHIFTRT
+ || outer == LSHIFTRT)
+ return 0;
+ return COSTS_N_INSNS (2);
- return reg;
-}
+ case CONST:
+ case CONST_DOUBLE:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ return COSTS_N_INSNS (3);
-static rtx
-arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
-{
- rtx insns, label, labelno, sum;
+ case UDIV:
+ case UMOD:
+ case DIV:
+ case MOD:
+ return 100;
- start_sequence ();
+ case TRUNCATE:
+ return 99;
- labelno = GEN_INT (pic_labelno++);
- label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
- label = gen_rtx_CONST (VOIDmode, label);
+ case AND:
+ case XOR:
+ case IOR:
+ /* XXX guess. */
+ return 8;
- sum = gen_rtx_UNSPEC (Pmode,
- gen_rtvec (4, x, GEN_INT (reloc), label,
- GEN_INT (TARGET_ARM ? 8 : 4)),
- UNSPEC_TLS);
- reg = load_tls_operand (sum, reg);
+ case MEM:
+ /* XXX another guess. */
+ /* Memory costs quite a lot for the first word, but subsequent words
+ load at the equivalent of a single insn each. */
+ return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+ + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ ? 4 : 0));
- if (TARGET_ARM)
- emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
- else if (TARGET_THUMB2)
- {
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- }
- else /* TARGET_THUMB1 */
- emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ case IF_THEN_ELSE:
+ /* XXX a guess. */
+ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+ return 14;
+ return 2;
- *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
- Pmode, 1, reg, Pmode);
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ total = mode == DImode ? COSTS_N_INSNS (1) : 0;
+ total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
- insns = get_insns ();
- end_sequence ();
+ if (mode == SImode)
+ return total;
- return insns;
+ if (arm_arch6)
+ return total + COSTS_N_INSNS (1);
+
+ /* Assume a two-shift sequence. Increase the cost slightly so
+ we prefer actual shifts over an extend operation. */
+ return total + 1 + COSTS_N_INSNS (2);
+
+ default:
+ return 99;
+ }
}
-rtx
-legitimize_tls_address (rtx x, rtx reg)
+static inline bool
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
- rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
- unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+ enum machine_mode mode = GET_MODE (x);
+ enum rtx_code subcode;
+ rtx operand;
+ enum rtx_code code = GET_CODE (x);
+ *total = 0;
- switch (model)
+ switch (code)
{
- case TLS_MODEL_GLOBAL_DYNAMIC:
- insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
- dest = gen_reg_rtx (Pmode);
- emit_libcall_block (insns, dest, ret, x);
- return dest;
+ case MEM:
+ /* Memory costs quite a lot for the first word, but subsequent words
+ load at the equivalent of a single insn each. */
+ *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+ return true;
- case TLS_MODEL_LOCAL_DYNAMIC:
- insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
+ case DIV:
+ case MOD:
+ case UDIV:
+ case UMOD:
+ if (TARGET_HARD_FLOAT && mode == SFmode)
+ *total = COSTS_N_INSNS (2);
+ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
+ *total = COSTS_N_INSNS (4);
+ else
+ *total = COSTS_N_INSNS (20);
+ return false;
- /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
- share the LDM result with other LD model accesses. */
- eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
- UNSPEC_TLS);
- dest = gen_reg_rtx (Pmode);
- emit_libcall_block (insns, dest, ret, eqv);
+ case ROTATE:
+ if (GET_CODE (XEXP (x, 1)) == REG)
+ *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
+ else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ *total = rtx_cost (XEXP (x, 1), code, speed);
+
+ /* Fall through */
+ case ROTATERT:
+ if (mode != SImode)
+ {
+ *total += COSTS_N_INSNS (4);
+ return true;
+ }
+
+ /* Fall through */
+ case ASHIFT: case LSHIFTRT: case ASHIFTRT:
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ if (mode == DImode)
+ {
+ *total += COSTS_N_INSNS (3);
+ return true;
+ }
- /* Load the addend. */
- addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
- UNSPEC_TLS);
- addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
- return gen_rtx_PLUS (Pmode, dest, addend);
+ *total += COSTS_N_INSNS (1);
+ /* Increase the cost of complex shifts because they aren't any faster,
+ and reduce dual issue opportunities. */
+ if (arm_tune_cortex_a9
+ && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
+ ++*total;
- case TLS_MODEL_INITIAL_EXEC:
- labelno = GEN_INT (pic_labelno++);
- label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
- label = gen_rtx_CONST (VOIDmode, label);
- sum = gen_rtx_UNSPEC (Pmode,
- gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
- GEN_INT (TARGET_ARM ? 8 : 4)),
- UNSPEC_TLS);
- reg = load_tls_operand (sum, reg);
+ return true;
- if (TARGET_ARM)
- emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
- else if (TARGET_THUMB2)
+ case MINUS:
+ if (mode == DImode)
{
- rtx tmp;
- /* Thumb-2 only allows very limited access to the PC. Calculate
- the address in a temporary register. */
- tmp = gen_reg_rtx (SImode);
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
- emit_insn (gen_addsi3(reg, reg, tmp));
- emit_move_insn (reg, gen_const_mem (SImode, reg));
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
}
- else
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
- emit_move_insn (reg, gen_const_mem (SImode, reg));
- }
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 0)))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- tp = arm_load_tp (NULL_RTX);
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
- return gen_rtx_PLUS (Pmode, tp, reg);
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
- case TLS_MODEL_LOCAL_EXEC:
- tp = arm_load_tp (NULL_RTX);
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- reg = gen_rtx_UNSPEC (Pmode,
- gen_rtvec (2, x, GEN_INT (TLS_LE32)),
- UNSPEC_TLS);
- reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
+ subcode = GET_CODE (XEXP (x, 1));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
- return gen_rtx_PLUS (Pmode, tp, reg);
+ /* A shift as a part of RSB costs no more than RSB itself. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- default:
- abort ();
- }
-}
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
-/* Try machine-dependent ways of modifying an illegitimate address
- to be legitimate. If we find one, return the new, valid address. */
-rtx
-arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
-{
- if (arm_tls_symbol_p (x))
- return legitimize_tls_address (x, NULL_RTX);
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
- if (GET_CODE (x) == PLUS)
- {
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
+ return true;
+ }
- if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
- xop0 = force_reg (SImode, xop0);
+ /* Fall through */
- if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
- xop1 = force_reg (SImode, xop1);
+ case PLUS:
+ if (code == PLUS && arm_arch6 && mode == SImode
+ && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+ {
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+ speed);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- if (ARM_BASE_REGISTER_RTX_P (xop0)
- && GET_CODE (xop1) == CONST_INT)
+ /* MLA: All arguments must be registers. We filter out
+ multiplication by a power of two, so that we fall down into
+ the code below. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
{
- HOST_WIDE_INT n, low_n;
- rtx base_reg, val;
- n = INTVAL (xop1);
+ /* The cost comes from the cost of the multiply. */
+ return false;
+ }
- /* VFP addressing modes actually allow greater offsets, but for
- now we just stick with the lowest common denominator. */
- if (mode == DImode
- || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
{
- low_n = n & 0x0f;
- n &= ~0x0f;
- if (low_n > 4)
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
{
- n += 16;
- low_n -= 16;
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
}
- }
- else
- {
- low_n = ((mode) == TImode ? 0
- : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
- n -= low_n;
+
+ return false;
}
- base_reg = gen_reg_rtx (SImode);
- val = force_operand (plus_constant (xop0, n), NULL_RTX);
- emit_move_insn (base_reg, val);
- x = plus_constant (base_reg, low_n);
+ *total = COSTS_N_INSNS (20);
+ return false;
}
- else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
- x = gen_rtx_PLUS (SImode, xop0, xop1);
- }
- /* XXX We don't allow MINUS any more -- see comment in
- arm_legitimate_address_p (). */
- else if (GET_CODE (x) == MINUS)
- {
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
+ return true;
+ }
- if (CONSTANT_P (xop0))
- xop0 = force_reg (SImode, xop0);
+ /* Fall through */
- if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
- xop1 = force_reg (SImode, xop1);
+ case AND: case XOR: case IOR:
- if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
- x = gen_rtx_MINUS (SImode, xop0, xop1);
- }
+ /* Normally the frame registers will be spilt into reg+const during
+ reload, so it is a bad idea to combine them with other instructions,
+ since then they might not be moved outside of loops. As a compromise
+ we allow integration with ops that have a constant as their second
+ operand. */
+ if (REG_OR_SUBREG_REG (XEXP (x, 0))
+ && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
+ && GET_CODE (XEXP (x, 1)) != CONST_INT)
+ *total = COSTS_N_INSNS (1);
- /* Make sure to take full advantage of the pre-indexed addressing mode
- with absolute addresses which often allows for the base register to
- be factorized for multiple adjacent memory references, and it might
- even allows for the mini pool to be avoided entirely. */
- else if (GET_CODE (x) == CONST_INT && optimize > 0)
- {
- unsigned int bits;
- HOST_WIDE_INT mask, base, index;
- rtx base_reg;
+ if (mode == DImode)
+ {
+ *total += COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
- /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
- use a 8-bit index. So let's use a 12-bit index for SImode only and
- hope that arm_gen_constant will enable ldrb to use more bits. */
- bits = (mode == SImode) ? 12 : 8;
- mask = (1 << bits) - 1;
- base = INTVAL (x) & ~mask;
- index = INTVAL (x) & mask;
- if (bit_count (base & 0xffffffff) > (32 - bits)/2)
- {
- /* It'll most probably be more efficient to generate the base
- with more bits set and use a negative index instead. */
- base |= mask;
- index -= mask;
+ return false;
}
- base_reg = force_reg (SImode, GEN_INT (base));
- x = plus_constant (base_reg, index);
- }
-
- if (flag_pic)
- {
- /* We need to find and carefully transform any SYMBOL and LABEL
- references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
- if (new_x != orig_x)
- x = new_x;
- }
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
- return x;
-}
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+ if (subcode == UMIN || subcode == UMAX
+ || subcode == SMIN || subcode == SMAX)
+ {
+ *total = COSTS_N_INSNS (3);
+ return true;
+ }
-/* Try machine-dependent ways of modifying an illegitimate Thumb address
- to be legitimate. If we find one, return the new, valid address. */
-rtx
-thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
-{
- if (arm_tls_symbol_p (x))
- return legitimize_tls_address (x, NULL_RTX);
+ return false;
- if (GET_CODE (x) == PLUS
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
- || INTVAL (XEXP (x, 1)) < 0))
- {
- rtx xop0 = XEXP (x, 0);
- rtx xop1 = XEXP (x, 1);
- HOST_WIDE_INT offset = INTVAL (xop1);
+ case MULT:
+ /* This should have been handled by the CPU specific routines. */
+ gcc_unreachable ();
- /* Try and fold the offset into a biasing of the base register and
- then offsetting that. Don't do this when optimizing for space
- since it can cause too many CSEs. */
- if (optimize_size && offset >= 0
- && offset < 256 + 31 * GET_MODE_SIZE (mode))
+ case TRUNCATE:
+ if (arm_arch3m && mode == SImode
+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
+ == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+ && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+ || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
{
- HOST_WIDE_INT delta;
+ *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
+ return true;
+ }
+ *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
+ return false;
- if (offset >= 256)
- delta = offset - (256 - GET_MODE_SIZE (mode));
- else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
- delta = 31 * GET_MODE_SIZE (mode);
- else
- delta = offset & (~31 * GET_MODE_SIZE (mode));
+ case NEG:
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (2);
+ return false;
+ }
- xop0 = force_operand (plus_constant (xop0, offset - delta),
- NULL_RTX);
- x = plus_constant (xop0, delta);
+ /* Fall through */
+ case NOT:
+ *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
+ if (mode == SImode && code == NOT)
+ {
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT
+ || (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
+ {
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ /* Register shifts cost an extra cycle. */
+ if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+ *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
+ subcode, speed);
+ return true;
+ }
}
- else if (offset < 0 && offset > -256)
- /* Small negative offsets are best done with a subtract before the
- dereference, forcing these into a register normally takes two
- instructions. */
- x = force_operand (x, NULL_RTX);
- else
+
+ return false;
+
+ case IF_THEN_ELSE:
+ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
{
- /* For the remaining cases, force the constant into a register. */
- xop1 = force_reg (SImode, xop1);
- x = gen_rtx_PLUS (SImode, xop0, xop1);
+ *total = COSTS_N_INSNS (4);
+ return true;
}
- }
- else if (GET_CODE (x) == PLUS
- && s_register_operand (XEXP (x, 1), SImode)
- && !s_register_operand (XEXP (x, 0), SImode))
- {
- rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
- x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
- }
+ operand = XEXP (x, 0);
- if (flag_pic)
- {
- /* We need to find and carefully transform any SYMBOL and LABEL
- references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+ if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
+ && GET_CODE (XEXP (operand, 0)) == REG
+ && REGNO (XEXP (operand, 0)) == CC_REGNUM))
+ *total += COSTS_N_INSNS (1);
+ *total += (rtx_cost (XEXP (x, 1), code, speed)
+ + rtx_cost (XEXP (x, 2), code, speed));
+ return true;
- if (new_x != orig_x)
- x = new_x;
- }
+ case NE:
+ if (mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
- return x;
-}
+ case GE:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
-rtx
-thumb_legitimize_reload_address (rtx *x_p,
- enum machine_mode mode,
- int opnum, int type,
- int ind_levels ATTRIBUTE_UNUSED)
-{
- rtx x = *x_p;
+ case LT:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
- if (GET_CODE (x) == PLUS
- && GET_MODE_SIZE (mode) < 4
- && REG_P (XEXP (x, 0))
- && XEXP (x, 0) == stack_pointer_rtx
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
- {
- rtx orig_x = x;
+ case EQ:
+ case GT:
+ case LE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
+ scc_insn:
+ /* SCC insns. In the case where the comparison has already been
+ performed, then they cost 2 instructions. Otherwise they need
+ an additional comparison before them. */
+ *total = COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ return true;
+ }
- x = copy_rtx (x);
- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
- Pmode, VOIDmode, 0, 0, opnum, type);
- return x;
- }
+ /* Fall through */
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ *total = 0;
+ return true;
+ }
- /* If both registers are hi-regs, then it's better to reload the
- entire expression rather than each register individually. That
- only requires one reload register rather than two. */
- if (GET_CODE (x) == PLUS
- && REG_P (XEXP (x, 0))
- && REG_P (XEXP (x, 1))
- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
- {
- rtx orig_x = x;
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
- x = copy_rtx (x);
- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
- Pmode, VOIDmode, 0, 0, opnum, type);
- return x;
- }
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
- return NULL;
-}
+ if (subcode == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ return false;
-/* Test for various thread-local symbols. */
+ case UMIN:
+ case UMAX:
+ case SMIN:
+ case SMAX:
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT
+ || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
-/* Return TRUE if X is a thread-local symbol. */
+ case ABS:
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+ *total = COSTS_N_INSNS (1);
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (3);
+ return false;
-static bool
-arm_tls_symbol_p (rtx x)
-{
- if (! TARGET_HAVE_TLS)
- return false;
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ *total = 0;
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ rtx op = XEXP (x, 0);
+ enum machine_mode opmode = GET_MODE (op);
- if (GET_CODE (x) != SYMBOL_REF)
- return false;
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (1);
- return SYMBOL_REF_TLS_MODEL (x) != 0;
-}
+ if (opmode != SImode)
+ {
+ if (MEM_P (op))
+ {
+ /* If !arm_arch4, we use one of the extendhisi2_mem
+ or movhi_bytes patterns for HImode. For a QImode
+ sign extension, we first zero-extend from memory
+ and then perform a shift sequence. */
+ if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
+ *total += COSTS_N_INSNS (2);
+ }
+ else if (arm_arch6)
+ *total += COSTS_N_INSNS (1);
+
+ /* We don't have the necessary insn, so we need to perform some
+ other operation. */
+ else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
+ /* An and with constant 255. */
+ *total += COSTS_N_INSNS (1);
+ else
+ /* A shift sequence. Increase costs slightly to avoid
+ combining two shifts into an extend operation. */
+ *total += COSTS_N_INSNS (2) + 1;
+ }
-/* Helper for arm_tls_referenced_p. */
+ return false;
+ }
-static int
-arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
-{
- if (GET_CODE (*x) == SYMBOL_REF)
- return SYMBOL_REF_TLS_MODEL (*x) != 0;
+ switch (GET_MODE (XEXP (x, 0)))
+ {
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V4QImode:
+ case V2HImode:
+ *total = COSTS_N_INSNS (1);
+ return false;
- /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
- TLS offsets, not real symbol references. */
- if (GET_CODE (*x) == UNSPEC
- && XINT (*x, 1) == UNSPEC_TLS)
- return -1;
+ default:
+ gcc_unreachable ();
+ }
+ gcc_unreachable ();
- return 0;
-}
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
-/* Return TRUE if X contains any TLS symbol references. */
+ case CONST_INT:
+ if (const_ok_for_arm (INTVAL (x))
+ || const_ok_for_arm (~INTVAL (x)))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
+ INTVAL (x), NULL_RTX,
+ NULL_RTX, 0, 0));
+ return true;
-bool
-arm_tls_referenced_p (rtx x)
-{
- if (! TARGET_HAVE_TLS)
- return false;
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ *total = COSTS_N_INSNS (3);
+ return true;
- return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
-}
+ case HIGH:
+ *total = COSTS_N_INSNS (1);
+ return true;
-/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
+ case LO_SUM:
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
-bool
-arm_cannot_force_const_mem (rtx x)
-{
- rtx base, offset;
+ case CONST_DOUBLE:
+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (4);
+ return true;
- if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
- {
- split_const (x, &base, &offset);
- if (GET_CODE (base) == SYMBOL_REF
- && !offset_within_block_p (base, INTVAL (offset)))
- return true;
+ default:
+ *total = COSTS_N_INSNS (4);
+ return false;
}
- return arm_tls_referenced_p (x);
}
-\f
-#define REG_OR_SUBREG_REG(X) \
- (GET_CODE (X) == REG \
- || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
-#define REG_OR_SUBREG_RTX(X) \
- (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
-
-#ifndef COSTS_N_INSNS
-#define COSTS_N_INSNS(N) ((N) * 4 - 2)
-#endif
+/* Estimates the size cost of thumb1 instructions.
+ For now most of the code is copied from thumb1_rtx_costs. We need more
+ fine grain tuning when we have more related test cases. */
static inline int
-thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
{
enum machine_mode mode = GET_MODE (x);
case MULT:
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- int cycles = 0;
- unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
-
- while (i)
- {
- i >>= 2;
- cycles++;
- }
- return COSTS_N_INSNS (2) + cycles;
- }
- return COSTS_N_INSNS (1) + 16;
+ {
+ /* Thumb1 mul instruction can't operate on const. We must Load it
+ into a register first. */
+ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
+ return COSTS_N_INSNS (1) + const_size;
+ }
+ return COSTS_N_INSNS (1);
case SET:
return (COSTS_N_INSNS (1)
- + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
- + GET_CODE (SET_DEST (x)) == MEM));
-
- case CONST_INT:
- if (outer == SET)
- {
- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
- return 0;
- if (thumb_shiftable_const (INTVAL (x)))
- return COSTS_N_INSNS (2);
- return COSTS_N_INSNS (3);
- }
+ + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
+ + GET_CODE (SET_DEST (x)) == MEM));
+
+ case CONST_INT:
+ if (outer == SET)
+ {
+ if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+ return COSTS_N_INSNS (1);
+ /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
+ if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
+ return COSTS_N_INSNS (2);
+ /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
+ if (thumb_shiftable_const (INTVAL (x)))
+ return COSTS_N_INSNS (2);
+ return COSTS_N_INSNS (3);
+ }
else if ((outer == PLUS || outer == COMPARE)
- && INTVAL (x) < 256 && INTVAL (x) > -256)
- return 0;
- else if (outer == AND
- && INTVAL (x) < 256 && INTVAL (x) >= -256)
- return COSTS_N_INSNS (1);
+ && INTVAL (x) < 256 && INTVAL (x) > -256)
+ return 0;
+ else if ((outer == IOR || outer == XOR || outer == AND)
+ && INTVAL (x) < 256 && INTVAL (x) >= -256)
+ return COSTS_N_INSNS (1);
+ else if (outer == AND)
+ {
+ int i;
+ /* This duplicates the tests in the andsi3 expander. */
+ for (i = 9; i <= 31; i++)
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+ return COSTS_N_INSNS (2);
+ }
else if (outer == ASHIFT || outer == ASHIFTRT
- || outer == LSHIFTRT)
- return 0;
+ || outer == LSHIFTRT)
+ return 0;
return COSTS_N_INSNS (2);
case CONST:
case MEM:
/* XXX another guess. */
/* Memory costs quite a lot for the first word, but subsequent words
- load at the equivalent of a single insn each. */
+ load at the equivalent of a single insn each. */
return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
- + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
- ? 4 : 0));
+ + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ ? 4 : 0));
case IF_THEN_ELSE:
/* XXX a guess. */
if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
- return 14;
+ return 14;
return 2;
case ZERO_EXTEND:
/* XXX still guessing. */
switch (GET_MODE (XEXP (x, 0)))
- {
- case QImode:
- return (1 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- case HImode:
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- case SImode:
- return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- default:
- return 99;
- }
-
- default:
- return 99;
- }
-}
-
-
-/* Worker routine for arm_rtx_costs. */
-/* ??? This needs updating for thumb2. */
-static inline int
-arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
-{
- enum machine_mode mode = GET_MODE (x);
- enum rtx_code subcode;
- int extra_cost;
-
- switch (code)
- {
- case MEM:
- /* Memory costs quite a lot for the first word, but subsequent words
- load at the equivalent of a single insn each. */
- return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
- + (GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
-
- case DIV:
- case MOD:
- case UDIV:
- case UMOD:
- return optimize_size ? COSTS_N_INSNS (2) : 100;
-
- case ROTATE:
- if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
- return 4;
- /* Fall through */
- case ROTATERT:
- if (mode != SImode)
- return 8;
- /* Fall through */
- case ASHIFT: case LSHIFTRT: case ASHIFTRT:
- if (mode == DImode)
- return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
- + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 8));
- return (1 + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 4)
- + ((GET_CODE (XEXP (x, 1)) == REG
- || (GET_CODE (XEXP (x, 1)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
- || (GET_CODE (XEXP (x, 1)) == CONST_INT))
- ? 0 : 4));
-
- case MINUS:
- if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
- {
- extra_cost = rtx_cost (XEXP (x, 1), code);
- if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
- }
-
- if (mode == DImode)
- return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
- ? 0 : 8));
-
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 0))))
- ? 0 : 8));
-
- if (((GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))
- && REG_OR_SUBREG_REG (XEXP (x, 1))))
- || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
- || subcode == ASHIFTRT || subcode == LSHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
- (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
- && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
- || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
- && REG_OR_SUBREG_REG (XEXP (x, 0))))
- return 1;
- /* Fall through */
-
- case PLUS:
- if (arm_arch6 && mode == SImode
- && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
- return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
- + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
-
- if (GET_CODE (XEXP (x, 0)) == MULT)
- {
- extra_cost = rtx_cost (XEXP (x, 0), code);
- if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
- }
-
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8));
-
- /* Fall through */
- case AND: case XOR: case IOR:
- extra_cost = 0;
-
- /* Normally the frame registers will be spilt into reg+const during
- reload, so it is a bad idea to combine them with other instructions,
- since then they might not be moved outside of loops. As a compromise
- we allow integration with ops that have a constant as their second
- operand. */
- if ((REG_OR_SUBREG_REG (XEXP (x, 0))
- && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
- && GET_CODE (XEXP (x, 1)) != CONST_INT)
- || (REG_OR_SUBREG_REG (XEXP (x, 0))
- && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
- extra_cost = 4;
-
- if (mode == DImode)
- return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 8));
-
- if (REG_OR_SUBREG_REG (XEXP (x, 0)))
- return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 4));
-
- else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
- return (1 + extra_cost
- + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
- || subcode == LSHIFTRT || subcode == ASHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
- && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
- || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
- ? 0 : 4));
-
- return 8;
-
- case MULT:
- /* This should have been handled by the CPU specific routines. */
- gcc_unreachable ();
-
- case TRUNCATE:
- if (arm_arch3m && mode == SImode
- && GET_CODE (XEXP (x, 0)) == LSHIFTRT
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
- && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
- == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
- && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
- || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
- return 8;
- return 99;
-
- case NEG:
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
- /* Fall through */
- case NOT:
- if (mode == DImode)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
-
- return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
-
- case IF_THEN_ELSE:
- if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
- return 14;
- return 2;
-
- case COMPARE:
- return 1;
-
- case ABS:
- return 4 + (mode == DImode ? 4 : 0);
-
- case SIGN_EXTEND:
- if (arm_arch_thumb2 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
-
- if (GET_MODE (XEXP (x, 0)) == QImode)
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
- /* Fall through */
- case ZERO_EXTEND:
- if (arm_arch6 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
-
- switch (GET_MODE (XEXP (x, 0)))
- {
- case QImode:
- return (1 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- case HImode:
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- case SImode:
- return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
-
- case V8QImode:
- case V4HImode:
- case V2SImode:
- case V4QImode:
- case V2HImode:
- return 1;
-
- default:
- gcc_unreachable ();
- }
- gcc_unreachable ();
+ {
+ case QImode:
+ return (1 + (mode == DImode ? 4 : 0)
+ + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
- case CONST_INT:
- if (const_ok_for_arm (INTVAL (x)))
- return outer == SET ? 2 : -1;
- else if (outer == AND
- && const_ok_for_arm (~INTVAL (x)))
- return -1;
- else if ((outer == COMPARE
- || outer == PLUS || outer == MINUS)
- && const_ok_for_arm (-INTVAL (x)))
- return -1;
- else
- return 5;
+ case HImode:
+ return (4 + (mode == DImode ? 4 : 0)
+ + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
- case CONST:
- case LABEL_REF:
- case SYMBOL_REF:
- return 6;
+ case SImode:
+ return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
- case CONST_DOUBLE:
- if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
- return outer == SET ? 2 : -1;
- else if ((outer == COMPARE || outer == PLUS)
- && neg_const_double_rtx_ok_for_fpa (x))
- return -1;
- return 7;
+ default:
+ return 99;
+ }
default:
return 99;
/* RTX costs when optimizing for size. */
static bool
-arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total)
{
enum machine_mode mode = GET_MODE (x);
-
if (TARGET_THUMB1)
{
- /* XXX TBD. For now, use the standard costs. */
- *total = thumb1_rtx_costs (x, code, outer_code);
+ *total = thumb1_size_rtx_costs (x, code, outer_code);
return true;
}
a single register, otherwise it costs one insn per word. */
if (REG_P (XEXP (x, 0)))
*total = COSTS_N_INSNS (1);
+ else if (flag_pic
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+ /* This will be split into two instructions.
+ See arm.md:calculate_pic_address. */
+ *total = COSTS_N_INSNS (2);
else
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
return true;
case ROTATE:
if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
{
- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
/* Fall through */
case ASHIFTRT:
if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
return true;
}
else if (mode == SImode)
{
- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
/* Slightly disparage register shifts, but not by much. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- *total += 1 + rtx_cost (XEXP (x, 1), code);
+ *total += 1 + rtx_cost (XEXP (x, 1), code, false);
return true;
}
return false;
case MINUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
return false;
case PLUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
{
*total = COSTS_N_INSNS (1);
return false;
}
+ /* A shift as a part of ADD costs nothing. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+ {
+ *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
+ *total += rtx_cost (XEXP (x, 1), code, false);
+ return true;
+ }
+
/* Fall through */
case AND: case XOR: case IOR:
if (mode == SImode)
return false;
case NEG:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
- *total = COSTS_N_INSNS (1);
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+
/* Fall through */
case NOT:
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
return false;
case ABS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
return false;
case SIGN_EXTEND:
- *total = 0;
- if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
- {
- if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
- *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
- }
- if (mode == DImode)
- *total += COSTS_N_INSNS (1);
- return false;
-
case ZERO_EXTEND:
- *total = 0;
- if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
- {
- switch (GET_MODE (XEXP (x, 0)))
- {
- case QImode:
- *total += COSTS_N_INSNS (1);
- break;
-
- case HImode:
- *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
-
- case SImode:
- break;
-
- default:
- *total += COSTS_N_INSNS (2);
- }
- }
-
- if (mode == DImode)
- *total += COSTS_N_INSNS (1);
-
- return false;
+ return arm_rtx_costs_1 (x, outer_code, total, 0);
case CONST_INT:
if (const_ok_for_arm (INTVAL (x)))
- *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
+ /* A multiplication by a constant requires another instruction
+ to load the constant to a register. */
+ *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
+ ? 1 : 0);
else if (const_ok_for_arm (~INTVAL (x)))
*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
else if (const_ok_for_arm (-INTVAL (x)))
*total = COSTS_N_INSNS (4);
return true;
+ case HIGH:
+ case LO_SUM:
+ /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
+ cost of these slightly. */
+ *total = COSTS_N_INSNS (1) + 1;
+ return true;
+
default:
if (mode != VOIDmode)
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
}
}
+/* RTX costs when optimizing for size. */
+static bool
+arm_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
+{
+ if (!speed)
+ return arm_size_rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code, total);
+ else
+ return current_tune->rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code,
+ total, speed);
+}
+
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
static bool
-arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
+ *total = COSTS_N_INSNS (cost);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
- *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);;
}
}
/* RTX cost for cores with a fast multiply unit (M variants). */
static bool
-arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS(2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS(cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (4);
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
+
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
so it can be ignored. */
static bool
-arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
switch (code)
{
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) != MULT)
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
+
+ /* A COMPARE of a MULT is slow on XScale; the muls instruction
+ will stall until the multiplication is complete. */
+ *total = COSTS_N_INSNS (3);
+ return false;
+
case MULT:
/* There is no point basing this on the tuning, since it is always the
fast variant if it exists at all. */
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
- & (unsigned HOST_WIDE_INT) 0xffffffff);
- int cost, const_ok = const_ok_for_arm (i);
+ /* If operand 1 is a constant we can more accurately
+ calculate the cost of the multiply. The multiplier can
+ retire 15 bits on the first cycle and a further 12 on the
+ second. We do, of course, have to load the constant into
+ a register first. */
+ unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+ /* There's a general overhead of one cycle. */
+ int cost = 1;
unsigned HOST_WIDE_INT masked_const;
- /* The cost will be related to two insns.
- First a load of the constant (MOV or LDR), then a multiply. */
- cost = 2;
- if (! const_ok)
- cost += 1; /* LDR is probably more expensive because
- of longer result latency. */
+ if (i & 0x80000000)
+ i = ~i;
+
+ i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
masked_const = i & 0xffff8000;
- if (masked_const != 0 && masked_const != 0xffff8000)
+ if (masked_const != 0)
{
+ cost++;
masked_const = i & 0xf8000000;
- if (masked_const == 0 || masked_const == 0xf8000000)
- cost += 1;
- else
- cost += 2;
+ if (masked_const != 0)
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS (cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
-
- case COMPARE:
- /* A COMPARE of a MULT is slow on XScale; the muls instruction
- will stall until the multiplication is complete. */
- if (GET_CODE (XEXP (x, 0)) == MULT)
- *total = 4 + rtx_cost (XEXP (x, 0), code);
- else
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (3);
+ return false;
+ }
+
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* RTX costs for 9e (and later) cores. */
static bool
-arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
- int nonreg_cost;
- int cost;
if (TARGET_THUMB1)
{
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 3;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- {
- *total = 30;
- return true;
- }
if (mode == DImode)
{
- cost = 7;
- nonreg_cost = 8;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
- else
+
+ if (mode == SImode)
{
- cost = 2;
- nonreg_cost = 4;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT
+ && (mode == SFmode
+ || (mode == DFmode && !TARGET_VFP_SINGLE)))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
- *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* All address computations that can be done are free, but rtx cost returns
if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
return 10;
- if (c == PLUS || c == MINUS)
+ if (c == PLUS)
{
- if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
return 2;
if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
}
static int
-arm_address_cost (rtx x)
+arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
}
-static int
-arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+/* Adjust cost hook for XScale. */
+static bool
+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
{
- rtx i_pat, d_pat;
-
/* Some true dependencies can have a higher cost depending
on precisely how certain input operands are used. */
- if (arm_tune_xscale
- && REG_NOTE_KIND (link) == 0
+ if (REG_NOTE_KIND(link) == 0
&& recog_memoized (insn) >= 0
&& recog_memoized (dep) >= 0)
{
if (reg_overlap_mentioned_p (recog_data.operand[opno],
shifted_operand))
- return 2;
+ {
+ *cost = 2;
+ return false;
+ }
}
}
}
+ return true;
+}
+
+/* Adjust cost hook for Cortex A9. */
+static bool
+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+ switch (REG_NOTE_KIND (link))
+ {
+ case REG_DEP_ANTI:
+ *cost = 0;
+ return false;
+
+ case REG_DEP_TRUE:
+ case REG_DEP_OUTPUT:
+ if (recog_memoized (insn) >= 0
+ && recog_memoized (dep) >= 0)
+ {
+ if (GET_CODE (PATTERN (insn)) == SET)
+ {
+ if (GET_MODE_CLASS
+ (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
+ || GET_MODE_CLASS
+ (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
+ {
+ enum attr_type attr_type_insn = get_attr_type (insn);
+ enum attr_type attr_type_dep = get_attr_type (dep);
+
+ /* By default all dependencies of the form
+ s0 = s0 <op> s1
+ s0 = s0 <op> s2
+ have an extra latency of 1 cycle because
+ of the input and output dependency in this
+ case. However this gets modeled as an true
+ dependency and hence all these checks. */
+ if (REG_P (SET_DEST (PATTERN (insn)))
+ && REG_P (SET_DEST (PATTERN (dep)))
+ && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
+ SET_DEST (PATTERN (dep))))
+ {
+ /* FMACS is a special case where the dependant
+ instruction can be issued 3 cycles before
+ the normal latency in case of an output
+ dependency. */
+ if ((attr_type_insn == TYPE_FMACS
+ || attr_type_insn == TYPE_FMACD)
+ && (attr_type_dep == TYPE_FMACS
+ || attr_type_dep == TYPE_FMACD))
+ {
+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+ *cost = insn_default_latency (dep) - 3;
+ else
+ *cost = insn_default_latency (dep);
+ return false;
+ }
+ else
+ {
+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+ *cost = insn_default_latency (dep) + 1;
+ else
+ *cost = insn_default_latency (dep);
+ }
+ return false;
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return true;
+}
+
+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
+ It corrects the value of COST based on the relationship between
+ INSN and DEP through the dependence LINK. It returns the new
+ value. There is a per-core adjust_cost hook to adjust scheduler costs
+ and the per-core hook can choose to completely override the generic
+ adjust_cost function. Only put bits of code into arm_adjust_cost that
+ are common across all cores. */
+static int
+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+ rtx i_pat, d_pat;
+
+ /* When generating Thumb-1 code, we want to place flag-setting operations
+ close to a conditional branch which depends on them, so that we can
+ omit the comparison. */
+ if (TARGET_THUMB1
+ && REG_NOTE_KIND (link) == 0
+ && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
+ && recog_memoized (dep) >= 0
+ && get_attr_conds (dep) == CONDS_SET)
+ return 0;
+
+ if (current_tune->sched_adjust_cost != NULL)
+ {
+ if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
+ return cost;
+ }
/* XXX This is not strictly true for the FPA. */
if (REG_NOTE_KIND (link) == REG_DEP_ANTI
constant pool are cached, and that others will miss. This is a
hack. */
- if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
+ if ((GET_CODE (src_mem) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (src_mem))
|| reg_mentioned_p (stack_pointer_rtx, src_mem)
|| reg_mentioned_p (frame_pointer_rtx, src_mem)
|| reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
init_fp_table ();
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- r = REAL_VALUE_NEGATE (r);
+ r = real_value_negate (&r);
if (REAL_VALUE_MINUS_ZERO (r))
return 0;
/* Extract sign, exponent and mantissa. */
sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
- r = REAL_VALUE_ABS (r);
+ r = real_value_abs (&r);
exponent = REAL_EXP (&r);
/* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
highest (sign) bit, with a fixed binary point at bit point_pos.
break; \
}
- unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+ unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
unsigned char bytes[16];
int immtype = -1, matches;
}
}
-/* Initialize a vector with non-constant elements. FIXME: We can do better
- than the current implementation (building a vector on the stack and then
- loading it) in many cases. See rs6000.c. */
+/* If VALS is a vector constant that can be loaded into a register
+ using VDUP, generate instructions to do so and return an RTX to
+ assign to the register. Otherwise return NULL_RTX. */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true;
+ rtx x;
+ int i;
+
+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+ return NULL_RTX;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (!all_same)
+ /* The elements are not all the same. We could handle repeating
+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+ {0, C, 0, C, 0, C, 0, C} which can be loaded using
+ vdup.i16). */
+ return NULL_RTX;
+
+ /* We can load this constant by using VDUP and a constant in a
+ single ARM register. This will be cheaper than a vector
+ load. */
+
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ return gen_rtx_VEC_DUPLICATE (mode, x);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, efficiently into a
+ register. Returns an RTX to copy into the register, or NULL_RTX
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */
+
+rtx
+neon_make_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ rtx target;
+ rtx const_vec = NULL_RTX;
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_const = 0;
+ int i;
+
+ if (GET_CODE (vals) == CONST_VECTOR)
+ const_vec = vals;
+ else if (GET_CODE (vals) == PARALLEL)
+ {
+ /* A CONST_VECTOR must contain only CONST_INTs and
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+ Only store valid constants in a CONST_VECTOR. */
+ for (i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ n_const++;
+ }
+ if (n_const == n_elts)
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ }
+ else
+ gcc_unreachable ();
+
+ if (const_vec != NULL
+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */
+ return const_vec;
+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
+ pipeline cycle; creating the constant takes one or two ARM
+ pipeline cycles. */
+ return target;
+ else if (const_vec != NULL_RTX)
+ /* Load from constant pool. On Cortex-A8 this takes two cycles
+ (for either double or quad vectors). We can not take advantage
+ of single-cycle VLD1 because we need a PC-relative addressing
+ mode. */
+ return const_vec;
+ else
+ /* A PARALLEL containing something not valid inside CONST_VECTOR.
+ We can not construct an initializer. */
+ return NULL_RTX;
+}
+
+/* Initialize vector TARGET to VALS. */
void
neon_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner = GET_MODE_INNER (mode);
- unsigned int i, n_elts = GET_MODE_NUNITS (mode);
- rtx mem;
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true;
+ rtx x, mem;
+ int i;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ ++n_var, one_var = i;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (n_var == 0)
+ {
+ rtx constant = neon_make_constant (vals);
+ if (constant != NULL_RTX)
+ {
+ emit_move_insn (target, constant);
+ return;
+ }
+ }
+
+ /* Splat a single non-constant element if we can. */
+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+ {
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_DUPLICATE (mode, x)));
+ return;
+ }
+
+ /* One field is non-constant. Load constant then overwrite varying
+ field. This is more efficient than using the stack. */
+ if (n_var == 1)
+ {
+ rtx copy = copy_rtx (vals);
+ rtx index = GEN_INT (one_var);
+
+ /* Load constant part of vector, substitute neighboring value for
+ varying element. */
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+ neon_expand_vector_init (target, copy);
- gcc_assert (VECTOR_MODE_P (mode));
+ /* Insert variable. */
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+ switch (mode)
+ {
+ case V8QImode:
+ emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
+ break;
+ case V16QImode:
+ emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
+ break;
+ case V4HImode:
+ emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
+ break;
+ case V8HImode:
+ emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
+ break;
+ case V2SImode:
+ emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
+ break;
+ case V4SImode:
+ emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
+ break;
+ case V2SFmode:
+ emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
+ break;
+ case V4SFmode:
+ emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
+ break;
+ case V2DImode:
+ emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return;
+ }
+ /* Construct the vector in memory one field at a time
+ and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
- XVECEXP (vals, 0, i));
-
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
}
/* Return TRUE if OP is a memory operand which we can load or store a vector
- to/from. If CORE is true, we're moving from ARM registers not Neon
- registers. */
+ to/from. TYPE is one of the following values:
+ 0 - Vector load/stor (vldr)
+ 1 - Core registers (ldm)
+ 2 - Element/structure loads (vld1)
+ */
int
-neon_vector_mem_operand (rtx op, bool core)
+neon_vector_mem_operand (rtx op, int type)
{
rtx ind;
return arm_address_register_rtx_p (ind, 0);
/* Allow post-increment with Neon registers. */
- if (!core && GET_CODE (ind) == POST_INC)
+ if ((type != 1 && GET_CODE (ind) == POST_INC)
+ || (type == 0 && GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
-#if 0
- /* FIXME: We can support this too if we use VLD1/VST1. */
- if (!core
- && GET_CODE (ind) == POST_MODIFY
- && arm_address_register_rtx_p (XEXP (ind, 0), 0)
- && GET_CODE (XEXP (ind, 1)) == PLUS
- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
- ind = XEXP (ind, 1);
-#endif
+ /* FIXME: vld1 allows register post-modify. */
/* Match:
(plus (reg)
(const)). */
- if (!core
+ if (type == 0
&& GET_CODE (ind) == PLUS
&& GET_CODE (XEXP (ind, 0)) == REG
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
enum reg_class
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
{
+ if (mode == HFmode)
+ {
+ if (!TARGET_NEON_FP16)
+ return GENERAL_REGS;
+ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+ return NO_REGS;
+ return GENERAL_REGS;
+ }
+
if (TARGET_NEON
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
- && neon_vector_mem_operand (x, FALSE))
+ && neon_vector_mem_operand (x, 0))
return NO_REGS;
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
}
}
-/* Must not copy a SET whose source operand is PC-relative. */
+/* Must not copy any rtx that uses a pc-relative address. */
+
+static int
+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
+{
+ if (GET_CODE (*x) == UNSPEC
+ && XINT (*x, 1) == UNSPEC_PIC_BASE)
+ return 1;
+ return 0;
+}
static bool
arm_cannot_copy_insn_p (rtx insn)
{
- rtx pat = PATTERN (insn);
-
- if (GET_CODE (pat) == SET)
- {
- rtx rhs = SET_SRC (pat);
-
- if (GET_CODE (rhs) == UNSPEC
- && XINT (rhs, 1) == UNSPEC_PIC_BASE)
- return TRUE;
-
- if (GET_CODE (rhs) == MEM
- && GET_CODE (XEXP (rhs, 0)) == UNSPEC
- && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
- return TRUE;
- }
-
- return FALSE;
+ return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
}
enum rtx_code
/* Don't accept any offset that will require multiple
instructions to handle, since this would cause the
arith_adjacentmem pattern to output an overlong sequence. */
- if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
+ if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
return 0;
/* Don't allow an eliminable register: register elimination can make
return 0;
}
-int
-load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
- HOST_WIDE_INT *load_offset)
+/* Return true iff it would be profitable to turn a sequence of NOPS loads
+ or stores (depending on IS_STORE) into a load-multiple or store-multiple
+ instruction. ADD_OFFSET is nonzero if the base address register needs
+ to be modified with an add instruction before we can use it. */
+
+static bool
+multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
+ int nops, HOST_WIDE_INT add_offset)
+ {
+ /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
+ if the offset isn't small enough. The reason 2 ldrs are faster
+ is because these ARMs are able to do more than one cache access
+ in a single cycle. The ARM9 and StrongARM have Harvard caches,
+ whilst the ARM8 has a double bandwidth cache. This means that
+ these cores can do both an instruction fetch and a data fetch in
+ a single cycle, so the trick of calculating the address into a
+ scratch register (one of the result regs) and then doing a load
+ multiple actually becomes slower (and no smaller in code size).
+ That is the transformation
+
+ ldr rd1, [rbase + offset]
+ ldr rd2, [rbase + offset + 4]
+
+ to
+
+ add rd1, rbase, offset
+ ldmia rd1, {rd1, rd2}
+
+ produces worse code -- '3 cycles + any stalls on rd2' instead of
+ '2 cycles + any stalls on rd2'. On ARMs with only one cache
+ access per cycle, the first sequence could never complete in less
+ than 6 cycles, whereas the ldm sequence would only take 5 and
+ would make better use of sequential accesses if not hitting the
+ cache.
+
+ We cheat here and test 'arm_ld_sched' which we currently know to
+ only be true for the ARM8, ARM9 and StrongARM. If this ever
+ changes, then the test below needs to be reworked. */
+ if (nops == 2 && arm_ld_sched && add_offset != 0)
+ return false;
+
+ /* XScale has load-store double instructions, but they have stricter
+ alignment requirements than load-store multiple, so we cannot
+ use them.
+
+ For XScale ldm requires 2 + NREGS cycles to complete and blocks
+ the pipeline until completion.
+
+ NREGS CYCLES
+ 1 3
+ 2 4
+ 3 5
+ 4 6
+
+ An ldr instruction takes 1-3 cycles, but does not block the
+ pipeline.
+
+ NREGS CYCLES
+ 1 1-3
+ 2 2-6
+ 3 3-9
+ 4 4-12
+
+ Best case ldr will always win. However, the more ldr instructions
+ we issue, the less likely we are to be able to schedule them well.
+ Using ldr instructions also increases code size.
+
+ As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+ for counts of 3 or 4 regs. */
+ if (nops <= 2 && arm_tune_xscale && !optimize_size)
+ return false;
+ return true;
+}
+
+/* Subroutine of load_multiple_sequence and store_multiple_sequence.
+ Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
+ an array ORDER which describes the sequence to use when accessing the
+ offsets that produces an ascending order. In this sequence, each
+ offset must be larger by exactly 4 than the previous one. ORDER[0]
+ must have been filled in with the lowest offset by the caller.
+ If UNSORTED_REGS is nonnull, it is an array of register numbers that
+ we use to verify that ORDER produces an ascending order of registers.
+ Return true if it was possible to construct such an order, false if
+ not. */
+
+static bool
+compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
+ int *unsorted_regs)
+{
+ int i;
+ for (i = 1; i < nops; i++)
+ {
+ int j;
+
+ order[i] = order[i - 1];
+ for (j = 0; j < nops; j++)
+ if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
+ {
+ /* We must find exactly one offset that is higher than the
+ previous one by 4. */
+ if (order[i] != order[i - 1])
+ return false;
+ order[i] = j;
+ }
+ if (order[i] == order[i - 1])
+ return false;
+ /* The register numbers must be ascending. */
+ if (unsorted_regs != NULL
+ && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
+ return false;
+ }
+ return true;
+}
+
+/* Used to determine in a peephole whether a sequence of load
+ instructions can be changed into a load-multiple instruction.
+ NOPS is the number of separate load instructions we are examining. The
+ first NOPS entries in OPERANDS are the destination registers, the
+ next NOPS entries are memory operands. If this function is
+ successful, *BASE is set to the common base register of the memory
+ accesses; *LOAD_OFFSET is set to the first memory location's offset
+ from that base register.
+ REGS is an array filled in with the destination register numbers.
+ SAVED_ORDER (if nonnull), is an array filled in with an order that maps
+ insn numbers to to an ascending order of stores. If CHECK_REGS is true,
+ the sequence of registers in REGS matches the loads from ascending memory
+ locations, and the function verifies that the register numbers are
+ themselves ascending. If CHECK_REGS is false, the register numbers
+ are stored in the order they are found in the operands. */
+static int
+load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
+ int *base, HOST_WIDE_INT *load_offset, bool check_regs)
{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
+ rtx base_reg_rtx = NULL;
int base_reg = -1;
- int i;
+ int i, ldm_case;
+
+ /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+ easily extended if required. */
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
- /* Can only handle 2, 3, or 4 insns at present,
- though could be easily extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
if (i == 0)
{
base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
- }
- else
- {
- if (base_reg != (int) REGNO (reg))
- /* Not addressed from the same base register. */
+ base_reg_rtx = reg;
+ if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
}
+ else if (base_reg != (int) REGNO (reg))
+ /* Not addressed from the same base register. */
+ return 0;
+
+ unsorted_regs[i] = (GET_CODE (operands[i]) == REG
+ ? REGNO (operands[i])
+ : REGNO (SUBREG_REG (operands[i])));
/* If it isn't an integer register, or if it overwrites the
base register but isn't the last insn in the list, then
we can't do this. */
- if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
+ if (unsorted_regs[i] < 0
+ || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+ || unsorted_regs[i] > 14
|| (i != nops - 1 && unsorted_regs[i] == base_reg))
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
-
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
-
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order,
+ check_regs ? unsorted_regs : NULL))
+ return 0;
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ if (saved_order)
+ memcpy (saved_order, order, sizeof order);
if (base)
{
*base = base_reg;
for (i = 0; i < nops; i++)
- regs[i] = unsorted_regs[order[i]];
+ regs[i] = unsorted_regs[check_regs ? order[i] : i];
*load_offset = unsorted_offsets[order[0]];
}
- if (unsorted_offsets[order[0]] == 0)
- return 1; /* ldmia */
-
- if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
- return 2; /* ldmib */
-
- if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* ldmda */
-
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* ldmdb */
-
- /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
- if the offset isn't small enough. The reason 2 ldrs are faster
- is because these ARMs are able to do more than one cache access
- in a single cycle. The ARM9 and StrongARM have Harvard caches,
- whilst the ARM8 has a double bandwidth cache. This means that
- these cores can do both an instruction fetch and a data fetch in
- a single cycle, so the trick of calculating the address into a
- scratch register (one of the result regs) and then doing a load
- multiple actually becomes slower (and no smaller in code size).
- That is the transformation
-
- ldr rd1, [rbase + offset]
- ldr rd2, [rbase + offset + 4]
-
- to
-
- add rd1, rbase, offset
- ldmia rd1, {rd1, rd2}
-
- produces worse code -- '3 cycles + any stalls on rd2' instead of
- '2 cycles + any stalls on rd2'. On ARMs with only one cache
- access per cycle, the first sequence could never complete in less
- than 6 cycles, whereas the ldm sequence would only take 5 and
- would make better use of sequential accesses if not hitting the
- cache.
-
- We cheat here and test 'arm_ld_sched' which we currently know to
- only be true for the ARM8, ARM9 and StrongARM. If this ever
- changes, then the test below needs to be reworked. */
- if (nops == 2 && arm_ld_sched)
+ if (TARGET_THUMB1
+ && !peep2_reg_dead_p (nops, base_reg_rtx))
return 0;
- /* Can't do it without setting up the offset, only do this if it takes
- no more than one insn. */
- return (const_ok_for_arm (unsorted_offsets[order[0]])
- || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
-}
-
-const char *
-emit_ldm_seq (rtx *operands, int nops)
-{
- int regs[4];
- int base_reg;
- HOST_WIDE_INT offset;
- char buf[100];
- int i;
-
- switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
- {
- case 1:
- strcpy (buf, "ldm%(ia%)\t");
- break;
-
- case 2:
- strcpy (buf, "ldm%(ib%)\t");
- break;
-
- case 3:
- strcpy (buf, "ldm%(da%)\t");
- break;
-
- case 4:
- strcpy (buf, "ldm%(db%)\t");
- break;
-
- case 5:
- if (offset >= 0)
- sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
- reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
- (long) offset);
- else
- sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
- reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
- (long) -offset);
- output_asm_insn (buf, operands);
- base_reg = regs[0];
- strcpy (buf, "ldm%(ia%)\t");
- break;
-
- default:
- gcc_unreachable ();
- }
-
- sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
- reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
-
- for (i = 1; i < nops; i++)
- sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
- reg_names[regs[i]]);
-
- strcat (buf, "}\t%@ phole ldm");
+ if (unsorted_offsets[order[0]] == 0)
+ ldm_case = 1; /* ldmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ ldm_case = 2; /* ldmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ ldm_case = 3; /* ldmda */
+ else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+ ldm_case = 4; /* ldmdb */
+ else if (const_ok_for_arm (unsorted_offsets[order[0]])
+ || const_ok_for_arm (-unsorted_offsets[order[0]]))
+ ldm_case = 5;
+ else
+ return 0;
- output_asm_insn (buf, operands);
- return "";
-}
+ if (!multiple_operation_profitable_p (false, nops,
+ ldm_case == 5
+ ? unsorted_offsets[order[0]] : 0))
+ return 0;
-int
-store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
- HOST_WIDE_INT * load_offset)
-{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ return ldm_case;
+}
+
+/* Used to determine in a peephole whether a sequence of store instructions can
+ be changed into a store-multiple instruction.
+ NOPS is the number of separate store instructions we are examining.
+ NOPS_TOTAL is the total number of instructions recognized by the peephole
+ pattern.
+ The first NOPS entries in OPERANDS are the source registers, the next
+ NOPS entries are memory operands. If this function is successful, *BASE is
+ set to the common base register of the memory accesses; *LOAD_OFFSET is set
+ to the first memory location's offset from that base register. REGS is an
+ array filled in with the source register numbers, REG_RTXS (if nonnull) is
+ likewise filled with the corresponding rtx's.
+ SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
+ numbers to to an ascending order of stores.
+ If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
+ from ascending memory locations, and the function verifies that the register
+ numbers are themselves ascending. If CHECK_REGS is false, the register
+ numbers are stored in the order they are found in the operands. */
+static int
+store_multiple_sequence (rtx *operands, int nops, int nops_total,
+ int *regs, rtx *reg_rtxs, int *saved_order, int *base,
+ HOST_WIDE_INT *load_offset, bool check_regs)
+{
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
int base_reg = -1;
- int i;
+ rtx base_reg_rtx = NULL;
+ int i, stm_case;
- /* Can only handle 2, 3, or 4 insns at present, though could be easily
- extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+ easily extended if required. */
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
+
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
&& (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
== CONST_INT)))
{
+ unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
+ ? operands[i] : SUBREG_REG (operands[i]));
+ unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
+
if (i == 0)
{
base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
- }
- else
- {
- if (base_reg != (int) REGNO (reg))
- /* Not addressed from the same base register. */
+ base_reg_rtx = reg;
+ if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
}
+ else if (base_reg != (int) REGNO (reg))
+ /* Not addressed from the same base register. */
+ return 0;
/* If it isn't an integer register, then we can't do this. */
- if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
+ if (unsorted_regs[i] < 0
+ || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+ || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
+ || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
+ || unsorted_regs[i] > 14)
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
-
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
-
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order,
+ check_regs ? unsorted_regs : NULL))
+ return 0;
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ if (saved_order)
+ memcpy (saved_order, order, sizeof order);
if (base)
{
*base = base_reg;
for (i = 0; i < nops; i++)
- regs[i] = unsorted_regs[order[i]];
+ {
+ regs[i] = unsorted_regs[check_regs ? order[i] : i];
+ if (reg_rtxs)
+ reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
+ }
*load_offset = unsorted_offsets[order[0]];
}
+ if (TARGET_THUMB1
+ && !peep2_reg_dead_p (nops_total, base_reg_rtx))
+ return 0;
+
if (unsorted_offsets[order[0]] == 0)
- return 1; /* stmia */
+ stm_case = 1; /* stmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ stm_case = 2; /* stmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ stm_case = 3; /* stmda */
+ else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+ stm_case = 4; /* stmdb */
+ else
+ return 0;
+
+ if (!multiple_operation_profitable_p (false, nops, 0))
+ return 0;
+
+ return stm_case;
+}
+\f
+/* Routines for use in generating RTL. */
+
+/* Generate a load-multiple instruction. COUNT is the number of loads in
+ the instruction; REGS and MEMS are arrays containing the operands.
+ BASEREG is the base register to be used in addressing the memory operands.
+ WBACK_OFFSET is nonzero if the instruction should update the base
+ register. */
+
+static rtx
+arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+ HOST_WIDE_INT wback_offset)
+{
+ int i = 0, j;
+ rtx result;
- if (unsorted_offsets[order[0]] == 4)
- return 2; /* stmib */
+ if (!multiple_operation_profitable_p (false, count, 0))
+ {
+ rtx seq;
+
+ start_sequence ();
- if (unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* stmda */
+ for (i = 0; i < count; i++)
+ emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* stmdb */
+ if (wback_offset != 0)
+ emit_move_insn (basereg, plus_constant (basereg, wback_offset));
- return 0;
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
+ }
+
+ result = gen_rtx_PARALLEL (VOIDmode,
+ rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+ if (wback_offset != 0)
+ {
+ XVECEXP (result, 0, 0)
+ = gen_rtx_SET (VOIDmode, basereg,
+ plus_constant (basereg, wback_offset));
+ i = 1;
+ count++;
+ }
+
+ for (j = 0; i < count; i++, j++)
+ XVECEXP (result, 0, i)
+ = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
+
+ return result;
}
-const char *
-emit_stm_seq (rtx *operands, int nops)
+/* Generate a store-multiple instruction. COUNT is the number of stores in
+ the instruction; REGS and MEMS are arrays containing the operands.
+ BASEREG is the base register to be used in addressing the memory operands.
+ WBACK_OFFSET is nonzero if the instruction should update the base
+ register. */
+
+static rtx
+arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+ HOST_WIDE_INT wback_offset)
{
- int regs[4];
- int base_reg;
- HOST_WIDE_INT offset;
- char buf[100];
- int i;
+ int i = 0, j;
+ rtx result;
+
+ if (GET_CODE (basereg) == PLUS)
+ basereg = XEXP (basereg, 0);
- switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
+ if (!multiple_operation_profitable_p (false, count, 0))
{
- case 1:
- strcpy (buf, "stm%(ia%)\t");
- break;
+ rtx seq;
- case 2:
- strcpy (buf, "stm%(ib%)\t");
- break;
+ start_sequence ();
- case 3:
- strcpy (buf, "stm%(da%)\t");
- break;
+ for (i = 0; i < count; i++)
+ emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
- case 4:
- strcpy (buf, "stm%(db%)\t");
- break;
+ if (wback_offset != 0)
+ emit_move_insn (basereg, plus_constant (basereg, wback_offset));
- default:
- gcc_unreachable ();
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
}
- sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
- reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
+ result = gen_rtx_PARALLEL (VOIDmode,
+ rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+ if (wback_offset != 0)
+ {
+ XVECEXP (result, 0, 0)
+ = gen_rtx_SET (VOIDmode, basereg,
+ plus_constant (basereg, wback_offset));
+ i = 1;
+ count++;
+ }
- for (i = 1; i < nops; i++)
- sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
- reg_names[regs[i]]);
+ for (j = 0; i < count; i++, j++)
+ XVECEXP (result, 0, i)
+ = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
- strcat (buf, "}\t%@ phole stm");
+ return result;
+}
- output_asm_insn (buf, operands);
- return "";
+/* Generate either a load-multiple or a store-multiple instruction. This
+ function can be used in situations where we can start with a single MEM
+ rtx and adjust its address upwards.
+ COUNT is the number of operations in the instruction, not counting a
+ possible update of the base register. REGS is an array containing the
+ register operands.
+ BASEREG is the base register to be used in addressing the memory operands,
+ which are constructed from BASEMEM.
+ WRITE_BACK specifies whether the generated instruction should include an
+ update of the base register.
+ OFFSETP is used to pass an offset to and from this function; this offset
+ is not used when constructing the address (instead BASEMEM should have an
+ appropriate offset in its address), it is used only for setting
+ MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
+
+static rtx
+arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
+ bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+{
+ rtx mems[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT offset = *offsetp;
+ int i;
+
+ gcc_assert (count <= MAX_LDM_STM_OPS);
+
+ if (GET_CODE (basereg) == PLUS)
+ basereg = XEXP (basereg, 0);
+
+ for (i = 0; i < count; i++)
+ {
+ rtx addr = plus_constant (basereg, i * 4);
+ mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
+ offset += 4;
+ }
+
+ if (write_back)
+ *offsetp = offset;
+
+ if (is_load)
+ return arm_gen_load_multiple_1 (count, regs, mems, basereg,
+ write_back ? 4 * count : 0);
+ else
+ return arm_gen_store_multiple_1 (count, regs, mems, basereg,
+ write_back ? 4 * count : 0);
}
-\f
-/* Routines for use in generating RTL. */
rtx
-arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
- int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
+ rtx basemem, HOST_WIDE_INT *offsetp)
{
- HOST_WIDE_INT offset = *offsetp;
- int i = 0, j;
- rtx result;
- int sign = up ? 1 : -1;
- rtx mem, addr;
+ return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
+ offsetp);
+}
- /* XScale has load-store double instructions, but they have stricter
- alignment requirements than load-store multiple, so we cannot
- use them.
+rtx
+arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
+ rtx basemem, HOST_WIDE_INT *offsetp)
+{
+ return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
+ offsetp);
+}
- For XScale ldm requires 2 + NREGS cycles to complete and blocks
- the pipeline until completion.
+/* Called from a peephole2 expander to turn a sequence of loads into an
+ LDM instruction. OPERANDS are the operands found by the peephole matcher;
+ NOPS indicates how many separate loads we are trying to combine. SORT_REGS
+ is true if we can reorder the registers because they are used commutatively
+ subsequently.
+ Returns true iff we could generate a new instruction. */
- NREGS CYCLES
- 1 3
- 2 4
- 3 5
- 4 6
+bool
+gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+{
+ int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int i, j, base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int ldm_case;
+ rtx addr;
- An ldr instruction takes 1-3 cycles, but does not block the
- pipeline.
+ ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
+ &base_reg, &offset, !sort_regs);
- NREGS CYCLES
- 1 1-3
- 2 2-6
- 3 3-9
- 4 4-12
+ if (ldm_case == 0)
+ return false;
- Best case ldr will always win. However, the more ldr instructions
- we issue, the less likely we are to be able to schedule them well.
- Using ldr instructions also increases code size.
+ if (sort_regs)
+ for (i = 0; i < nops - 1; i++)
+ for (j = i + 1; j < nops; j++)
+ if (regs[i] > regs[j])
+ {
+ int t = regs[i];
+ regs[i] = regs[j];
+ regs[j] = t;
+ }
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+ if (TARGET_THUMB1)
+ {
+ gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
+ gcc_assert (ldm_case == 1 || ldm_case == 5);
+ write_back = TRUE;
+ }
+
+ if (ldm_case == 5)
+ {
+ rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
+ emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ if (!TARGET_THUMB1)
+ {
+ base_reg = regs[0];
+ base_reg_rtx = newbase;
+ }
+ }
- As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
- for counts of 3 or 4 regs. */
- if (arm_tune_xscale && count <= 2 && ! optimize_size)
+ for (i = 0; i < nops; i++)
{
- rtx seq;
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
+}
- start_sequence ();
+/* Called from a peephole2 expander to turn a sequence of stores into an
+ STM instruction. OPERANDS are the operands found by the peephole matcher;
+ NOPS indicates how many separate stores we are trying to combine.
+ Returns true iff we could generate a new instruction. */
- for (i = 0; i < count; i++)
- {
- addr = plus_constant (from, i * 4 * sign);
- mem = adjust_automodify_address (basemem, SImode, addr, offset);
- emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
- offset += 4 * sign;
- }
+bool
+gen_stm_seq (rtx *operands, int nops)
+{
+ int i;
+ int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int stm_case;
+ rtx addr;
+ bool base_reg_dies;
- if (write_back)
- {
- emit_move_insn (from, plus_constant (from, count * 4 * sign));
- *offsetp = offset;
- }
+ stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
+ mem_order, &base_reg, &offset, true);
- seq = get_insns ();
- end_sequence ();
+ if (stm_case == 0)
+ return false;
- return seq;
- }
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
- result = gen_rtx_PARALLEL (VOIDmode,
- rtvec_alloc (count + (write_back ? 1 : 0)));
- if (write_back)
+ base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
+ if (TARGET_THUMB1)
{
- XVECEXP (result, 0, 0)
- = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
- i = 1;
- count++;
+ gcc_assert (base_reg_dies);
+ write_back = TRUE;
}
- for (j = 0; i < count; i++, j++)
+ if (stm_case == 5)
{
- addr = plus_constant (from, j * 4 * sign);
- mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
- XVECEXP (result, 0, i)
- = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
- offset += 4 * sign;
+ gcc_assert (base_reg_dies);
+ emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
}
- if (write_back)
- *offsetp = offset;
+ addr = plus_constant (base_reg_rtx, offset);
- return result;
+ for (i = 0; i < nops; i++)
+ {
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
}
-rtx
-arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
- int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+/* Called from a peephole2 expander to turn a sequence of stores that are
+ preceded by constant loads into an STM instruction. OPERANDS are the
+ operands found by the peephole matcher; NOPS indicates how many
+ separate stores we are trying to combine; there are 2 * NOPS
+ instructions in the peephole.
+ Returns true iff we could generate a new instruction. */
+
+bool
+gen_const_stm_seq (rtx *operands, int nops)
{
- HOST_WIDE_INT offset = *offsetp;
- int i = 0, j;
- rtx result;
- int sign = up ? 1 : -1;
- rtx mem, addr;
+ int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
+ int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int stm_case;
+ rtx addr;
+ bool base_reg_dies;
+ int i, j;
+ HARD_REG_SET allocated;
- /* See arm_gen_load_multiple for discussion of
- the pros/cons of ldm/stm usage for XScale. */
- if (arm_tune_xscale && count <= 2 && ! optimize_size)
- {
- rtx seq;
+ stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
+ mem_order, &base_reg, &offset, false);
- start_sequence ();
+ if (stm_case == 0)
+ return false;
- for (i = 0; i < count; i++)
- {
- addr = plus_constant (to, i * 4 * sign);
- mem = adjust_automodify_address (basemem, SImode, addr, offset);
- emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
- offset += 4 * sign;
- }
+ memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
- if (write_back)
- {
- emit_move_insn (to, plus_constant (to, count * 4 * sign));
- *offsetp = offset;
- }
+ /* If the same register is used more than once, try to find a free
+ register. */
+ CLEAR_HARD_REG_SET (allocated);
+ for (i = 0; i < nops; i++)
+ {
+ for (j = i + 1; j < nops; j++)
+ if (regs[i] == regs[j])
+ {
+ rtx t = peep2_find_free_register (0, nops * 2,
+ TARGET_THUMB1 ? "l" : "r",
+ SImode, &allocated);
+ if (t == NULL_RTX)
+ return false;
+ reg_rtxs[i] = t;
+ regs[i] = REGNO (t);
+ }
+ }
- seq = get_insns ();
- end_sequence ();
+ /* Compute an ordering that maps the register numbers to an ascending
+ sequence. */
+ reg_order[0] = 0;
+ for (i = 0; i < nops; i++)
+ if (regs[i] < regs[reg_order[0]])
+ reg_order[0] = i;
- return seq;
+ for (i = 1; i < nops; i++)
+ {
+ int this_order = reg_order[i - 1];
+ for (j = 0; j < nops; j++)
+ if (regs[j] > regs[reg_order[i - 1]]
+ && (this_order == reg_order[i - 1]
+ || regs[j] < regs[this_order]))
+ this_order = j;
+ reg_order[i] = this_order;
}
- result = gen_rtx_PARALLEL (VOIDmode,
- rtvec_alloc (count + (write_back ? 1 : 0)));
- if (write_back)
+ /* Ensure that registers that must be live after the instruction end
+ up with the correct value. */
+ for (i = 0; i < nops; i++)
{
- XVECEXP (result, 0, 0)
- = gen_rtx_SET (VOIDmode, to,
- plus_constant (to, count * 4 * sign));
- i = 1;
- count++;
+ int this_order = reg_order[i];
+ if ((this_order != mem_order[i]
+ || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
+ && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
+ return false;
}
- for (j = 0; i < count; i++, j++)
+ /* Load the constants. */
+ for (i = 0; i < nops; i++)
{
- addr = plus_constant (to, j * 4 * sign);
- mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
- XVECEXP (result, 0, i)
- = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
- offset += 4 * sign;
+ rtx op = operands[2 * nops + mem_order[i]];
+ sorted_regs[i] = regs[reg_order[i]];
+ emit_move_insn (reg_rtxs[reg_order[i]], op);
}
- if (write_back)
- *offsetp = offset;
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
- return result;
+ base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
+ if (TARGET_THUMB1)
+ {
+ gcc_assert (base_reg_dies);
+ write_back = TRUE;
+ }
+
+ if (stm_case == 5)
+ {
+ gcc_assert (base_reg_dies);
+ emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ }
+
+ addr = plus_constant (base_reg_rtx, offset);
+
+ for (i = 0; i < nops; i++)
+ {
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
}
int
for (i = 0; in_words_to_go >= 2; i+=4)
{
if (in_words_to_go > 4)
- emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
- srcbase, &srcoffset));
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
+ TRUE, srcbase, &srcoffset));
else
- emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
- FALSE, srcbase, &srcoffset));
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
+ src, FALSE, srcbase,
+ &srcoffset));
if (out_words_to_go)
{
if (out_words_to_go > 4)
- emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
- dstbase, &dstoffset));
+ emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
+ TRUE, dstbase, &dstoffset));
else if (out_words_to_go != 1)
- emit_insn (arm_gen_store_multiple (0, out_words_to_go,
- dst, TRUE,
+ emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
+ out_words_to_go, dst,
(last_bytes == 0
? FALSE : TRUE),
dstbase, &dstoffset));
/* A compare with a shifted operand. Because of canonicalization, the
comparison will have to be swapped when we emit the assembler. */
- if (GET_MODE (y) == SImode && GET_CODE (y) == REG
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|| GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
|| GET_CODE (x) == ROTATERT))
/* This operation is performed swapped, but since we only rely on the Z
flag we don't need an additional mode. */
- if (GET_MODE (y) == SImode && REG_P (y)
+ if (GET_MODE (y) == SImode
+ && (REG_P (y) || (GET_CODE (y) == SUBREG))
&& GET_CODE (x) == NEG
&& (op == EQ || op == NE))
return CC_Zmode;
&& (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
return CC_Cmode;
+ if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
+ {
+ /* To keep things simple, always use the Cirrus cfcmp64 if it is
+ available. */
+ if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
+ return CCmode;
+
+ switch (op)
+ {
+ case EQ:
+ case NE:
+ /* A DImode comparison against zero can be implemented by
+ or'ing the two halves together. */
+ if (y == const0_rtx)
+ return CC_Zmode;
+
+ /* We can do an equality test in three Thumb instructions. */
+ if (!TARGET_ARM)
+ return CC_Zmode;
+
+ /* FALLTHROUGH */
+
+ case LTU:
+ case LEU:
+ case GTU:
+ case GEU:
+ /* DImode unsigned comparisons can be implemented by cmp +
+ cmpeq without a scratch register. Not worth doing in
+ Thumb-2. */
+ if (TARGET_ARM)
+ return CC_CZmode;
+
+ /* FALLTHROUGH */
+
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ /* DImode signed and unsigned comparisons can be implemented
+ by cmp + sbcs with a scratch register, but that does not
+ set the Z flag - we must reverse GT/LE/GTU/LEU. */
+ gcc_assert (op != EQ && op != NE);
+ return CC_NCVmode;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
return CCmode;
}
rtx
arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
{
- enum machine_mode mode = SELECT_CC_MODE (code, x, y);
- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+ enum machine_mode mode;
+ rtx cc_reg;
+ int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
+
+ /* We might have X as a constant, Y as a register because of the predicates
+ used for cmpdi. If so, force X to a register here. */
+ if (dimode_comparison && !REG_P (x))
+ x = force_reg (DImode, x);
+
+ mode = SELECT_CC_MODE (code, x, y);
+ cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+ if (dimode_comparison
+ && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
+ && mode != CC_CZmode)
+ {
+ rtx clobber, set;
- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+ /* To compare two non-zero values for equality, XOR them and
+ then compare against zero. Not used for ARM mode; there
+ CC_CZmode is cheaper. */
+ if (mode == CC_Zmode && y != const0_rtx)
+ {
+ x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
+ y = const0_rtx;
+ }
+ /* A scratch register is required. */
+ clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
+ set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+ }
+ else
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
return cc_reg;
}
its maximum address (which can happen if we have
re-located a forwards fix); force the new fix to come
after it. */
- min_mp = mp;
- min_address = mp->min_address + fix->fix_size;
+ if (ARM_DOUBLEWORD_ALIGN
+ && fix->fix_size >= 8 && mp->fix_size < 8)
+ return NULL;
+ else
+ {
+ min_mp = mp;
+ min_address = mp->min_address + fix->fix_size;
+ }
}
- /* If we are inserting an 8-bytes aligned quantity and
- we have not already found an insertion point, then
- make sure that all such 8-byte aligned quantities are
- placed at the start of the pool. */
+ /* Do not insert a non-8-byte aligned quantity before 8-byte
+ aligned quantities. */
else if (ARM_DOUBLEWORD_ALIGN
- && min_mp == NULL
- && fix->fix_size >= 8
- && mp->fix_size < 8)
+ && fix->fix_size < 8
+ && mp->fix_size >= 8)
{
min_mp = mp;
min_address = mp->min_address + fix->fix_size;
return false;
}
+/* Return true if it is possible to inline both the high and low parts
+ of a 64-bit constant into 32-bit data processing instructions. */
+bool
+arm_const_double_by_immediates (rtx val)
+{
+ enum machine_mode mode = GET_MODE (val);
+ rtx part;
+
+ if (mode == VOIDmode)
+ mode = DImode;
+
+ part = gen_highpart_mode (SImode, mode, val);
+
+ gcc_assert (GET_CODE (part) == CONST_INT);
+
+ if (!const_ok_for_arm (INTVAL (part)))
+ return false;
+
+ part = gen_lowpart (SImode, val);
+
+ gcc_assert (GET_CODE (part) == CONST_INT);
+
+ if (!const_ok_for_arm (INTVAL (part)))
+ return false;
+
+ return true;
+}
+
/* Scan INSN and note any of its operands that need fixing.
If DO_PUSHES is false we do not actually push any of the fixups
needed. The function returns TRUE if any fixups were needed/pushed.
return result;
}
+/* Convert instructions to their cc-clobbering variant if possible, since
+ that allows us to use smaller encodings. */
+
+static void
+thumb2_reorg (void)
+{
+ basic_block bb;
+ regset_head live;
+
+ INIT_REG_SET (&live);
+
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+ df_analyze ();
+
+ FOR_EACH_BB (bb)
+ {
+ rtx insn;
+ COPY_REG_SET (&live, DF_LR_OUT (bb));
+ df_simulate_initialize_backwards (bb, &live);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (NONJUMP_INSN_P (insn)
+ && !REGNO_REG_SET_P (&live, CC_REGNUM))
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SET
+ && low_register_operand (XEXP (pat, 0), SImode)
+ && thumb_16bit_operator (XEXP (pat, 1), SImode)
+ && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
+ && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+ {
+ rtx dst = XEXP (pat, 0);
+ rtx src = XEXP (pat, 1);
+ rtx op0 = XEXP (src, 0);
+ if (rtx_equal_p (dst, op0)
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+ rtvec vec = gen_rtvec (2, pat, clobber);
+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+ INSN_CODE (insn) = -1;
+ }
+ }
+ }
+ if (NONDEBUG_INSN_P (insn))
+ df_simulate_one_insn_backwards (bb, insn, &live);
+ }
+ }
+ CLEAR_REG_SET (&live);
+}
+
/* Gcc puts the pool in the wrong place for ARM, since we can only
load addresses a limited distance around the pc. We do some
special munging to move the constant pool values to the correct
HOST_WIDE_INT address = 0;
Mfix * fix;
+ if (TARGET_THUMB2)
+ thumb2_reorg ();
+
minipool_fix_head = minipool_fix_tail = NULL;
/* The first insn must always be a note, or the code below won't
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ - (count * 8)))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
}
par = emit_insn (par);
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
RTX_FRAME_RELATED_P (par) = 1;
return count * 8;
return "";
}
-/* Output a 'call' insn that is a reference in memory. */
+/* Output a 'call' insn that is a reference in memory. This is
+ disabled for ARMv5 and we prefer a blx instead because otherwise
+ there's a significant performance overhead. */
const char *
output_call_mem (rtx *operands)
{
- if (TARGET_INTERWORK && !arm_arch5)
+ gcc_assert (!arm_arch5);
+ if (TARGET_INTERWORK)
{
output_asm_insn ("ldr%?\t%|ip, %0", operands);
output_asm_insn ("mov%?\t%|lr, %|pc", operands);
first instruction. It's safe to use IP as the target of the
load since the call will kill it anyway. */
output_asm_insn ("ldr%?\t%|ip, %0", operands);
- if (arm_arch5)
- output_asm_insn ("blx%?\t%|ip", operands);
+ output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+ if (arm_arch4t)
+ output_asm_insn ("bx%?\t%|ip", operands);
else
- {
- output_asm_insn ("mov%?\t%|lr, %|pc", operands);
- if (arm_arch4t)
- output_asm_insn ("bx%?\t%|ip", operands);
- else
- output_asm_insn ("mov%?\t%|pc, %|ip", operands);
- }
+ output_asm_insn ("mov%?\t%|pc, %|ip", operands);
}
else
{
return "";
}
+void
+arm_emit_movpair (rtx dest, rtx src)
+ {
+ /* If the src is an immediate, simplify it. */
+ if (CONST_INT_P (src))
+ {
+ HOST_WIDE_INT val = INTVAL (src);
+ emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+ if ((val >> 16) & 0x0000ffff)
+ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+ GEN_INT (16)),
+ GEN_INT ((val >> 16) & 0x0000ffff));
+ return;
+ }
+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+ }
/* Output a move from arm registers to an fpa registers.
OPERANDS[0] is an fpa register.
if (code0 == REG)
{
- int reg0 = REGNO (operands[0]);
+ unsigned int reg0 = REGNO (operands[0]);
otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
switch (GET_CODE (XEXP (operands[1], 0)))
{
case REG:
- if (TARGET_LDRD)
+ if (TARGET_LDRD
+ && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
else
output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
case PRE_MODIFY:
case POST_MODIFY:
+ /* Autoicrement addressing modes should never have overlapping
+ base and destination registers, and overlapping index registers
+ are already prohibited, so this doesn't need to worry about
+ fix_cm3_ldrd. */
otherops[0] = operands[0];
otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
}
else
{
- /* IWMMXT allows offsets larger than ldrd can handle,
- fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
- && (INTVAL(otherops[2]) <= -256
- || INTVAL(otherops[2]) >= 256))
+ /* Use a single insn if we can.
+ FIXME: IWMMXT allows offsets larger than ldrd can
+ handle, fix these up with a pair of ldr. */
+ if (TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
+ else
{
output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
- otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
}
- else
- output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
}
}
else
{
- /* IWMMXT allows offsets larger than ldrd can handle,
+ /* Use a single insn if we can.
+ FIXME: IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
- && (INTVAL(otherops[2]) <= -256
- || INTVAL(otherops[2]) >= 256))
+ if (TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
+ else
{
- otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
- otherops[0] = operands[0];
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
}
- else
- /* We only allow constant increments, so this is safe. */
- output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
}
break;
/* We might be able to use ldrd %0, %1 here. However the range is
different to ldr/adr, and it is broken on some ARMv7-M
implementations. */
- output_asm_insn ("adr%?\t%0, %1", operands);
+ /* Use the second register of the pair to avoid problematic
+ overlap. */
+ otherops[1] = operands[1];
+ output_asm_insn ("adr%?\t%0, %1", otherops);
+ operands[1] = otherops[0];
if (TARGET_LDRD)
- output_asm_insn ("ldr%(d%)\t%0, [%0]", operands);
+ output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
else
- output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+ output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
break;
/* ??? This needs checking for thumb2. */
return "";
}
}
+ otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
+ operands[1] = otherops[0];
if (TARGET_LDRD
&& (GET_CODE (otherops[2]) == REG
+ || TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
&& INTVAL (otherops[2]) > -256
&& INTVAL (otherops[2]) < 256)))
{
- if (reg_overlap_mentioned_p (otherops[0],
+ if (reg_overlap_mentioned_p (operands[0],
otherops[2]))
{
+ rtx tmp;
/* Swap base and index registers over to
avoid a conflict. */
- otherops[1] = XEXP (XEXP (operands[1], 0), 1);
- otherops[2] = XEXP (XEXP (operands[1], 0), 0);
+ tmp = otherops[1];
+ otherops[1] = otherops[2];
+ otherops[2] = tmp;
}
/* If both registers conflict, it will usually
have been fixed by a splitter. */
- if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
+ if (reg_overlap_mentioned_p (operands[0], otherops[2])
+ || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
{
- output_asm_insn ("add%?\t%1, %1, %2", otherops);
- output_asm_insn ("ldr%(d%)\t%0, [%1]",
- otherops);
+ output_asm_insn ("add%?\t%0, %1, %2", otherops);
+ output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
}
else
- output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
+ {
+ otherops[0] = operands[0];
+ output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
+ }
return "";
}
output_asm_insn ("sub%?\t%0, %1, %2", otherops);
if (TARGET_LDRD)
- return "ldr%(d%)\t%0, [%0]";
+ return "ldr%(d%)\t%0, [%1]";
- return "ldm%(ia%)\t%0, %M0";
+ return "ldm%(ia%)\t%1, %M0";
}
else
{
/* IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (GET_CODE (otherops[2]) == CONST_INT
+ if (!TARGET_THUMB2
+ && GET_CODE (otherops[2]) == CONST_INT
&& (INTVAL(otherops[2]) <= -256
|| INTVAL(otherops[2]) >= 256))
{
- rtx reg1;
- reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
{
- output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
- otherops[0] = reg1;
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
+ output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
+ output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
}
else
{
- otherops[0] = reg1;
- output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
- otherops[0] = operands[1];
- output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
+ output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+ output_asm_insn ("str%?\t%0, [%1], %2", otherops);
}
}
else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
}
if (TARGET_LDRD
&& (GET_CODE (otherops[2]) == REG
+ || TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
&& INTVAL (otherops[2]) > -256
&& INTVAL (otherops[2]) < 256)))
default:
otherops[0] = adjust_address (operands[0], SImode, 4);
- otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
+ otherops[1] = operands[1];
output_asm_insn ("str%?\t%1, %0", operands);
- output_asm_insn ("str%?\t%1, %0", otherops);
+ output_asm_insn ("str%?\t%H1, %0", otherops);
}
}
}
/* Output a move, load or store for quad-word vectors in ARM registers. Only
- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
+ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
const char *
output_move_quad (rtx *operands)
int load = REG_P (operands[0]);
int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
- const char *template;
+ const char *templ;
char buff[50];
enum machine_mode mode;
switch (GET_CODE (addr))
{
case PRE_DEC:
- template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+ templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
ops[0] = XEXP (addr, 0);
ops[1] = reg;
break;
case POST_INC:
- template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+ templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
ops[0] = XEXP (addr, 0);
ops[1] = reg;
break;
default:
- template = "f%s%c%%?\t%%%s0, %%1%s";
+ templ = "f%s%c%%?\t%%%s0, %%1%s";
ops[0] = reg;
ops[1] = mem;
break;
}
- sprintf (buff, template,
+ sprintf (buff, templ,
load ? "ld" : "st",
dp ? 'd' : 's',
dp ? "P" : "",
}
/* Output a Neon quad-word load or store, or a load or store for
- larger structure modes. We could also support post-modify forms using
- VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
- yet.
- WARNING: The ordering of elements in memory is weird in big-endian mode,
- because we use VSTM instead of VST1, to make it easy to make vector stores
- via ARM registers write values in the same order as stores direct from Neon
- registers. For example, the byte ordering of a quadword vector with 16-byte
- elements like this:
+ larger structure modes.
- [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
+ WARNING: The ordering of elements is weird in big-endian mode,
+ because we use VSTM, as required by the EABI. GCC RTL defines
+ element ordering based on in-memory order. This can be differ
+ from the architectural ordering of elements within a NEON register.
+ The intrinsics defined in arm_neon.h use the NEON register element
+ ordering, not the GCC RTL element ordering.
- will be (with lowest address first, h = most-significant byte,
- l = least-significant byte of element):
+ For example, the in-memory ordering of a big-endian a quadword
+ vector with 16-bit elements when stored from register pair {d0,d1}
+ will be (lowest address first, d0[N] is NEON register element N):
- [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
- e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
+ [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
- When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
- rN in the order:
+ When necessary, quadword registers (dN, dN+1) are moved to ARM
+ registers from rN in the order:
dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
- So that STM/LDM can be used on vectors in ARM registers, and the same memory
- layout will result as if VSTM/VLDM were used. */
+ So that STM/LDM can be used on vectors in ARM registers, and the
+ same memory layout will result as if VSTM/VLDM were used. */
const char *
output_move_neon (rtx *operands)
{
rtx reg, mem, addr, ops[2];
int regno, load = REG_P (operands[0]);
- const char *template;
+ const char *templ;
char buff[50];
enum machine_mode mode;
switch (GET_CODE (addr))
{
case POST_INC:
- template = "v%smia%%?\t%%0!, %%h1";
+ templ = "v%smia%%?\t%%0!, %%h1";
ops[0] = XEXP (addr, 0);
ops[1] = reg;
break;
+ case PRE_DEC:
+ /* FIXME: We should be using vld1/vst1 here in BE mode? */
+ templ = "v%smdb%%?\t%%0!, %%h1";
+ ops[0] = XEXP (addr, 0);
+ ops[1] = reg;
+ break;
+
case POST_MODIFY:
/* FIXME: Not currently enabled in neon_vector_mem_operand. */
gcc_unreachable ();
{
/* We're only using DImode here because it's a convenient size. */
ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
- ops[1] = adjust_address (mem, SImode, 8 * i);
+ ops[1] = adjust_address (mem, DImode, 8 * i);
if (reg_overlap_mentioned_p (ops[0], mem))
{
gcc_assert (overlap == -1);
}
default:
- template = "v%smia%%?\t%%m0, %%h1";
+ templ = "v%smia%%?\t%%m0, %%h1";
ops[0] = mem;
ops[1] = reg;
}
- sprintf (buff, template, load ? "ld" : "st");
+ sprintf (buff, templ, load ? "ld" : "st");
output_asm_insn (buff, ops);
return "";
}
+/* Compute and return the length of neon_mov<mode>, where <mode> is
+ one of VSTRUCT modes: EI, OI, CI or XI. */
+int
+arm_attr_length_move_neon (rtx insn)
+{
+ rtx reg, mem, addr;
+ int load;
+ enum machine_mode mode;
+
+ extract_insn_cached (insn);
+
+ if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
+ {
+ mode = GET_MODE (recog_data.operand[0]);
+ switch (mode)
+ {
+ case EImode:
+ case OImode:
+ return 8;
+ case CImode:
+ return 12;
+ case XImode:
+ return 16;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ load = REG_P (recog_data.operand[0]);
+ reg = recog_data.operand[!load];
+ mem = recog_data.operand[load];
+
+ gcc_assert (MEM_P (mem));
+
+ mode = GET_MODE (reg);
+ addr = XEXP (mem, 0);
+
+ /* Strip off const from addresses like (const (plus (...))). */
+ if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+ addr = XEXP (addr, 0);
+
+ if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
+ {
+ int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
+ return insns * 4;
+ }
+ else
+ return 4;
+}
+
+/* Return nonzero if the offset in the address is an immediate. Otherwise,
+ return zero. */
+
+int
+arm_address_offset_is_imm (rtx insn)
+{
+ rtx mem, addr;
+
+ extract_insn_cached (insn);
+
+ if (REG_P (recog_data.operand[0]))
+ return 0;
+
+ mem = recog_data.operand[0];
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (GET_CODE (addr) == REG
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == REG
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT))
+ return 1;
+ else
+ return 0;
+}
+
/* Output an ADD r, s, #n where n may be too big for one instruction.
If adding zero to one register, output nothing. */
const char *
&& crtl->uses_pic_offset_table)
save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
}
+ else if (IS_VOLATILE(func_type))
+ {
+ /* For noreturn functions we historically omitted register saves
+ altogether. However this really messes up debugging. As a
+ compromise save just the frame pointers. Combined with the link
+ register saved elsewhere this should be sufficient to get
+ a backtrace. */
+ if (frame_pointer_needed)
+ save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+ if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
+ save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
+ }
else
{
/* In the normal case we only need to save those registers
}
+/* Compute the number of bytes used to store the static chain register on the
+ stack, above the stack frame. We need to know this accurately to get the
+ alignment of the rest of the stack frame correct. */
+
+static int arm_compute_static_chain_stack_bytes (void)
+{
+ unsigned long func_type = arm_current_func_type ();
+ int static_chain_stack_bytes = 0;
+
+ if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
+ IS_NESTED (func_type) &&
+ df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
+ static_chain_stack_bytes = 4;
+
+ return static_chain_stack_bytes;
+}
+
+
/* Compute a bit mask of which registers need to be
saved on the stack for the current function.
This is used by arm_get_frame_offsets, which may add extra registers. */
| (1 << LR_REGNUM)
| (1 << PC_REGNUM);
- /* Volatile functions do not return, so there
- is no need to save any other registers. */
- if (IS_VOLATILE (func_type))
- return save_reg_mask;
-
save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
/* Decide if we need to save the link register.
if (TARGET_REALLY_IWMMXT
&& ((bit_count (save_reg_mask)
- + ARM_NUM_INTS (crtl->args.pretend_args_size)) % 2) != 0)
+ + ARM_NUM_INTS (crtl->args.pretend_args_size +
+ arm_compute_static_chain_stack_bytes())
+ ) % 2) != 0)
{
/* The total number of registers that are going to be pushed
onto the stack is odd. We need to ensure that the stack
mask |= 1 << reg;
}
+ /* The 504 below is 8 bytes less than 512 because there are two possible
+ alignment words. We can't tell here if they will be present or not so we
+ have to play it safe and assume that they are. */
+ if ((CALLER_INTERWORKING_SLOT_SIZE +
+ ROUND_UP_WORD (get_frame_size ()) +
+ crtl->outgoing_args_size) >= 504)
+ {
+ /* This is the same as the code in thumb1_expand_prologue() which
+ determines which register to use for stack decrement. */
+ for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
+ if (mask & (1 << reg))
+ break;
+
+ if (reg > LAST_LO_REGNUM)
+ {
+ /* Make sure we have a register available for stack decrement. */
+ mask |= 1 << LAST_LO_REGNUM;
+ }
+ }
+
return mask;
}
sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
- return_used_this_function = 1;
+ cfun->machine->return_used_this_function = 1;
offsets = arm_get_frame_offsets ();
live_regs_mask = offsets->saved_regs_mask;
gcc_assert (stack_adjust == 0 || stack_adjust == 4);
if (stack_adjust && arm_arch5 && TARGET_ARM)
- sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
else
{
/* If we can't use ldmib (SA110 bug),
then try to pop r3 instead. */
if (stack_adjust)
live_regs_mask |= 1 << 3;
- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
+
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
}
}
else
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
+ if (TARGET_UNIFIED_ASM)
+ sprintf (instr, "pop%s\t{", conditional);
+ else
+ sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
p = instr + strlen (instr);
if (crtl->calls_eh_return)
asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
- return_used_this_function = 0;
}
const char *
/* If we have already generated the return instruction
then it is futile to generate anything else. */
- if (use_return_insn (FALSE, sibling) && return_used_this_function)
+ if (use_return_insn (FALSE, sibling) &&
+ (cfun->machine->return_used_this_function != 0))
return "";
func_type = arm_current_func_type ();
/* This variable is for the Virtual Frame Pointer, not VFP regs. */
int vfp_offset = offsets->frame;
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
(where frame pointer is required to point at first register)
and ARM-non-apcs-frame. Therefore, such change is postponed
until real need arise. */
- HOST_WIDE_INT amount;
+ unsigned HOST_WIDE_INT amount;
int rfe;
/* Restore stack pointer if necessary. */
if (TARGET_ARM && frame_pointer_needed)
&& !crtl->tail_call_emit)
{
unsigned long mask;
- mask = (1 << (arm_size_return_regs() / 4)) - 1;
+ /* Preserve return values, of any size. */
+ mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
mask ^= 0xf;
mask &= ~saved_regs_mask;
reg = 0;
SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
}
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
if (TARGET_HARD_FLOAT && TARGET_VFP)
{
- start_reg = FIRST_VFP_REGNUM;
- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+ int end_reg = LAST_VFP_REGNUM + 1;
+
+ /* Scan the registers in reverse order. We need to match
+ any groupings made in the prologue and generate matching
+ pop operations. */
+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
{
if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+ && (!df_regs_ever_live_p (reg + 1)
+ || call_used_regs[reg + 1]))
{
- if (start_reg != reg)
+ if (end_reg > reg + 2)
vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
- start_reg = reg + 2;
+ (reg + 2 - FIRST_VFP_REGNUM) / 2,
+ (end_reg - (reg + 2)) / 2);
+ end_reg = reg;
}
}
- if (start_reg != reg)
- vfp_output_fldmd (f, SP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
+ if (end_reg > reg + 2)
+ vfp_output_fldmd (f, SP_REGNUM, 0,
+ (end_reg - (reg + 2)) / 2);
}
+
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
/* ??? Probably not safe to set this here, since it assumes that a
function will be emitted as assembly immediately after we generate
RTL for it. This does not happen for inline functions. */
- return_used_this_function = 0;
+ cfun->machine->return_used_this_function = 0;
}
else /* TARGET_32BIT */
{
offsets = arm_get_frame_offsets ();
gcc_assert (!use_return_insn (FALSE, NULL)
- || !return_used_this_function
+ || (cfun->machine->return_used_this_function != 0)
|| offsets->saved_regs == offsets->outgoing_args
|| frame_pointer_needed);
/* For the body of the insn we are going to generate an UNSPEC in
parallel with several USEs. This allows the insn to be recognized
- by the push_multi pattern in the arm.md file. The insn looks
- something like this:
+ by the push_multi pattern in the arm.md file.
+
+ The body of the insn looks something like this:
(parallel [
- (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
+ (set (mem:BLK (pre_modify:SI (reg:SI sp)
+ (const_int:SI <num>)))
(unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
- (use (reg:SI 11 fp))
- (use (reg:SI 12 ip))
- (use (reg:SI 14 lr))
- (use (reg:SI 15 pc))
+ (use (reg:SI XX))
+ (use (reg:SI YY))
+ ...
])
For the frame note however, we try to be more explicit and actually
(sequence [
(set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
(set (mem:SI (reg:SI sp)) (reg:SI r4))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+ ...
])
- This sequence is used both by the code to support stack unwinding for
- exceptions handlers and the code to generate dwarf2 frame debugging. */
+ FIXME:: In an ideal world the PRE_MODIFY would not exist and
+ instead we'd have a parallel expression detailing all
+ the stores to the various memory addresses so that debug
+ information is more up-to-date. Remember however while writing
+ this to take care of the constraints with the push instruction.
+
+ Note also that this has to be taken care of for the VFP registers.
+
+ For more see PR43399. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -4 * num_regs))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
{
tmp
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (SImode,
- plus_constant (stack_pointer_rtx,
- 4 * j)),
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx,
+ 4 * j)),
reg);
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, 0) = tmp;
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
return par;
}
XVECEXP (par, 0, 0)
= gen_rtx_SET (VOIDmode,
- gen_frame_mem (BLKmode,
- gen_rtx_PRE_DEC (BLKmode,
- stack_pointer_rtx)),
+ gen_frame_mem
+ (BLKmode,
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ plus_constant
+ (stack_pointer_rtx,
+ -12 * count))
+ ),
gen_rtx_UNSPEC (BLKmode,
gen_rtvec (1, reg),
UNSPEC_PUSH_MULT));
XVECEXP (dwarf, 0, 0) = tmp;
par = emit_insn (par);
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (par));
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
return par;
}
offsets->saved_args = crtl->args.pretend_args_size;
/* In Thumb mode this is incorrect, but never used. */
- offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
+ offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
+ arm_compute_static_chain_stack_bytes();
if (TARGET_32BIT)
{
}
/* Saved registers include the stack frame. */
- offsets->saved_regs = offsets->saved_args + saved;
+ offsets->saved_regs = offsets->saved_args + saved +
+ arm_compute_static_chain_stack_bytes();
offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
/* A leaf function does not need any stack alignment if it has nothing
on the stack. */
{
int reg = -1;
- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
- {
- if ((offsets->saved_regs_mask & (1 << i)) == 0)
- {
- reg = i;
- break;
- }
- }
-
- if (reg == -1 && arm_size_return_regs () <= 12
- && !crtl->tail_call_emit)
+ /* If it is safe to use r3, then do so. This sometimes
+ generates better code on Thumb-2 by avoiding the need to
+ use 32-bit push/pop instructions. */
+ if (!crtl->tail_call_emit
+ && arm_size_return_regs () <= 12
+ && (offsets->saved_regs_mask & (1 << 3)) == 0)
{
- /* Push/pop an argument register (r3) if all callee saved
- registers are already being pushed. */
reg = 3;
}
+ else
+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+ {
+ if ((offsets->saved_regs_mask & (1 << i)) == 0)
+ {
+ reg = i;
+ break;
+ }
+ }
if (reg != -1)
{
return offsets->soft_frame - offsets->saved_args;
case ARM_HARD_FRAME_POINTER_REGNUM:
- /* If there is no stack frame then the hard
- frame pointer and the arg pointer coincide. */
- if (offsets->frame == offsets->saved_regs)
- return 0;
- /* FIXME: Not sure about this. Maybe we should always return 0 ? */
- return (frame_pointer_needed
- && cfun->static_chain_decl != NULL
- && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
+ /* This is only non-zero in the case where the static chain register
+ is stored above the frame. */
+ return offsets->frame - offsets->saved_args - 4;
case STACK_POINTER_REGNUM:
/* If nothing has been pushed on the stack at all
}
}
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. Frame pointer elimination is automatically handled.
+
+ All eliminations are permissible. Note that ARG_POINTER_REGNUM and
+ HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
+ pointer, we must eliminate FRAME_POINTER_REGNUM into
+ HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
+ ARG_POINTER_REGNUM. */
+
+bool
+arm_can_eliminate (const int from, const int to)
+{
+ return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
+ (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
+ (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
+ (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
+ true);
+}
/* Emit RTL to save coprocessor registers on function entry. Returns the
number of bytes pushed. */
for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
{
- insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
insn = gen_rtx_MEM (V2SImode, insn);
insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
RTX_FRAME_RELATED_P (insn) = 1;
/* Save any floating point call-saved registers used by this
function. */
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
- insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
insn = gen_rtx_MEM (XFmode, insn);
insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
RTX_FRAME_RELATED_P (insn) = 1;
dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
plus_constant (stack_pointer_rtx, amount));
RTX_FRAME_RELATED_P (dwarf) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
RTX_FRAME_RELATED_P (insn) = 1;
r0 = gen_rtx_REG (SImode, 0);
r1 = gen_rtx_REG (SImode, 1);
- dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
+ /* Use a real rtvec rather than NULL_RTVEC so the rest of the
+ compiler won't choke. */
+ dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
insn = gen_movsi (r0, stack_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- dwarf, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
emit_insn (insn);
emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
emit_insn (gen_movsi (stack_pointer_rtx, r1));
{
rtx dwarf;
+ gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
+ saved_regs += 4;
+
insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
fp_offset = 4;
plus_constant (stack_pointer_rtx,
-fp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- dwarf, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
else
{
using the EABI unwinder, to prevent faulting instructions from being
swapped with a stack adjustment. */
if (crtl->profile || !TARGET_SCHED_PROLOG
- || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
+ || (arm_except_unwind_info () == UI_TARGET
+ && cfun->can_throw_non_call_exceptions))
emit_insn (gen_blockage ());
/* If the link register is being kept alive, with the return address in it,
before output.
If CODE is 'B' then output a bitwise inverted value of X (a const int).
If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
-void
+static void
arm_print_operand (FILE *stream, rtx x, int code)
{
switch (code)
{
REAL_VALUE_TYPE r;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- r = REAL_VALUE_NEGATE (r);
+ r = real_value_negate (&r);
fprintf (stream, "%s", fp_const_from_val (&r));
}
return;
- /* An integer without a preceding # sign. */
+ /* An integer or symbol address without a preceding # sign. */
case 'c':
- gcc_assert (GET_CODE (x) == CONST_INT);
- fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ break;
+
+ case SYMBOL_REF:
+ output_addr_const (stream, x);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
return;
case 'B':
the value being loaded is big-wordian or little-wordian. The
order of the two register loads can matter however, if the address
of the memory location is actually held in one of the registers
- being overwritten by the load. */
+ being overwritten by the load.
+
+ The 'Q' and 'R' constraints are also available for 64-bit
+ constants. */
case 'Q':
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ {
+ rtx part = gen_lowpart (SImode, x);
+ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+ return;
+ }
+
if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
{
output_operand_lossage ("invalid operand for code '%c'", code);
return;
case 'R':
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ {
+ enum machine_mode mode = GET_MODE (x);
+ rtx part;
+
+ if (mode == VOIDmode)
+ mode = DImode;
+ part = gen_highpart_mode (SImode, mode, x);
+ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+ return;
+ }
+
if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
{
output_operand_lossage ("invalid operand for code '%c'", code);
}
return;
+ /* Print the high single-precision register of a VFP double-precision
+ register. */
+ case 'p':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+ }
+ return;
+
/* Print a VFP/Neon double precision or quad precision register name. */
case 'P':
case 'q':
}
return;
+ /* Memory operand for vld1/vst1 instruction. */
+ case 'A':
+ {
+ rtx addr;
+ bool postinc = FALSE;
+ unsigned align, modesize, align_bits;
+
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ if (GET_CODE (addr) == POST_INC)
+ {
+ postinc = 1;
+ addr = XEXP (addr, 0);
+ }
+ asm_fprintf (stream, "[%r", REGNO (addr));
+
+ /* We know the alignment of this access, so we can emit a hint in the
+ instruction (for some alignments) as an aid to the memory subsystem
+ of the target. */
+ align = MEM_ALIGN (x) >> 3;
+ modesize = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Only certain alignment specifiers are supported by the hardware. */
+ if (modesize == 16 && (align % 32) == 0)
+ align_bits = 256;
+ else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+ align_bits = 128;
+ else if ((align % 8) == 0)
+ align_bits = 64;
+ else
+ align_bits = 0;
+
+ if (align_bits != 0)
+ asm_fprintf (stream, ":%d", align_bits);
+
+ asm_fprintf (stream, "]");
+
+ if (postinc)
+ fputs("!", stream);
+ }
+ return;
+
+ case 'C':
+ {
+ rtx addr;
+
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ gcc_assert (GET_CODE (addr) == REG);
+ asm_fprintf (stream, "[%r]", REGNO (addr));
+ }
+ return;
+
+ /* Translate an S register number into a D register number and element index. */
+ case 'y':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+ }
+ return;
+
+ /* Register specifier for vld1.16/vst1.16. Translate the S register
+ number into a D register number and element index. */
+ case 'z':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
+ }
+ return;
+
default:
if (x == 0)
{
default:
gcc_assert (GET_CODE (x) != NEG);
fputc ('#', stream);
+ if (GET_CODE (x) == HIGH)
+ {
+ fputs (":lower16:", stream);
+ x = XEXP (x, 0);
+ }
+
output_addr_const (stream, x);
break;
}
}
}
\f
+/* Target hook for printing a memory address. */
+static void
+arm_print_operand_address (FILE *stream, rtx x)
+{
+ if (TARGET_32BIT)
+ {
+ int is_minus = GET_CODE (x) == MINUS;
+
+ if (GET_CODE (x) == REG)
+ asm_fprintf (stream, "[%r, #0]", REGNO (x));
+ else if (GET_CODE (x) == PLUS || is_minus)
+ {
+ rtx base = XEXP (x, 0);
+ rtx index = XEXP (x, 1);
+ HOST_WIDE_INT offset = 0;
+ if (GET_CODE (base) != REG
+ || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
+ {
+ /* Ensure that BASE is a register. */
+ /* (one of them must be). */
+ /* Also ensure the SP is not used as in index register. */
+ rtx temp = base;
+ base = index;
+ index = temp;
+ }
+ switch (GET_CODE (index))
+ {
+ case CONST_INT:
+ offset = INTVAL (index);
+ if (is_minus)
+ offset = -offset;
+ asm_fprintf (stream, "[%r, #%wd]",
+ REGNO (base), offset);
+ break;
+
+ case REG:
+ asm_fprintf (stream, "[%r, %s%r]",
+ REGNO (base), is_minus ? "-" : "",
+ REGNO (index));
+ break;
+
+ case MULT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ASHIFT:
+ case ROTATERT:
+ {
+ asm_fprintf (stream, "[%r, %s%r",
+ REGNO (base), is_minus ? "-" : "",
+ REGNO (XEXP (index, 0)));
+ arm_print_operand (stream, index, 'S');
+ fputs ("]", stream);
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
+ || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
+ {
+ extern enum machine_mode output_memory_reference_mode;
+
+ gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+ if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
+ asm_fprintf (stream, "[%r, #%s%d]!",
+ REGNO (XEXP (x, 0)),
+ GET_CODE (x) == PRE_DEC ? "-" : "",
+ GET_MODE_SIZE (output_memory_reference_mode));
+ else
+ asm_fprintf (stream, "[%r], #%s%d",
+ REGNO (XEXP (x, 0)),
+ GET_CODE (x) == POST_DEC ? "-" : "",
+ GET_MODE_SIZE (output_memory_reference_mode));
+ }
+ else if (GET_CODE (x) == PRE_MODIFY)
+ {
+ asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
+ if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
+ asm_fprintf (stream, "#%wd]!",
+ INTVAL (XEXP (XEXP (x, 1), 1)));
+ else
+ asm_fprintf (stream, "%r]!",
+ REGNO (XEXP (XEXP (x, 1), 1)));
+ }
+ else if (GET_CODE (x) == POST_MODIFY)
+ {
+ asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
+ if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
+ asm_fprintf (stream, "#%wd",
+ INTVAL (XEXP (XEXP (x, 1), 1)));
+ else
+ asm_fprintf (stream, "%r",
+ REGNO (XEXP (XEXP (x, 1), 1)));
+ }
+ else output_addr_const (stream, x);
+ }
+ else
+ {
+ if (GET_CODE (x) == REG)
+ asm_fprintf (stream, "[%r]", REGNO (x));
+ else if (GET_CODE (x) == POST_INC)
+ asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
+ else if (GET_CODE (x) == PLUS)
+ {
+ gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ asm_fprintf (stream, "[%r, #%wd]",
+ REGNO (XEXP (x, 0)),
+ INTVAL (XEXP (x, 1)));
+ else
+ asm_fprintf (stream, "[%r, %r]",
+ REGNO (XEXP (x, 0)),
+ REGNO (XEXP (x, 1)));
+ }
+ else
+ output_addr_const (stream, x);
+ }
+}
+\f
+/* Target hook for indicating whether a punctuation character for
+ TARGET_PRINT_OPERAND is valid. */
+static bool
+arm_print_operand_punct_valid_p (unsigned char code)
+{
+ return (code == '@' || code == '|' || code == '.'
+ || code == '(' || code == ')' || code == '#'
+ || (TARGET_32BIT && (code == '?'))
+ || (TARGET_THUMB2 && (code == '!'))
+ || (TARGET_THUMB && (code == '_')));
+}
+\f
/* Target hook for assembling integer objects. The ARM version needs to
handle word-sized values specially. */
static bool
if (arm_vector_mode_supported_p (mode))
{
int i, units;
- unsigned int invmask = 0, parts_per_word;
gcc_assert (GET_CODE (x) == CONST_VECTOR);
units = CONST_VECTOR_NUNITS (x);
size = GET_MODE_SIZE (GET_MODE_INNER (mode));
- /* For big-endian Neon vectors, we must permute the vector to the form
- which, when loaded by a VLDR or VLDM instruction, will give a vector
- with the elements in the right order. */
- if (TARGET_NEON && WORDS_BIG_ENDIAN)
- {
- parts_per_word = UNITS_PER_WORD / size;
- /* FIXME: This might be wrong for 64-bit vector elements, but we don't
- support those anywhere yet. */
- invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
- }
-
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
for (i = 0; i < units; i++)
{
- rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
+ rtx elt = CONST_VECTOR_ELT (x, i);
assemble_integer
(elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
}
get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
- int code;
+ enum arm_cond_code code;
enum rtx_code comp_code = GET_CODE (comparison);
if (GET_MODE_CLASS (mode) != MODE_CC)
case CC_Cmode:
switch (comp_code)
- {
- case LTU: return ARM_CS;
- case GEU: return ARM_CC;
- default: gcc_unreachable ();
- }
+ {
+ case LTU: return ARM_CS;
+ case GEU: return ARM_CC;
+ default: gcc_unreachable ();
+ }
+
+ case CC_CZmode:
+ switch (comp_code)
+ {
+ case NE: return ARM_NE;
+ case EQ: return ARM_EQ;
+ case GEU: return ARM_CS;
+ case GTU: return ARM_HI;
+ case LEU: return ARM_LS;
+ case LTU: return ARM_CC;
+ default: gcc_unreachable ();
+ }
+
+ case CC_NCVmode:
+ switch (comp_code)
+ {
+ case GE: return ARM_GE;
+ case LT: return ARM_LT;
+ case GEU: return ARM_CS;
+ case LTU: return ARM_CC;
+ default: gcc_unreachable ();
+ }
case CCmode:
switch (comp_code)
reversed if it appears to fail. */
int reverse = 0;
- /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
- taken are clobbered, even if the rtl suggests otherwise. It also
- means that we have to grub around within the jump expression to find
- out what the conditions are when the jump isn't taken. */
- int jump_clobbers = 0;
-
/* If we start with a return insn, we only succeed if we find another one. */
int seeking_return = 0;
int then_not_else = TRUE;
rtx this_insn = start_insn, label = 0;
- /* If the jump cannot be done with one instruction, we cannot
- conditionally execute the instruction in the inverse case. */
- if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
- {
- jump_clobbers = 1;
- return;
- }
-
/* Register the insn jumped to. */
if (reverse)
{
control falls in from somewhere else. */
if (this_insn == label)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
this_insn = next_nonnote_insn (this_insn);
if (this_insn && this_insn == label)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
if (this_insn && this_insn == label
&& insns_skipped < max_insns_skipped)
{
- if (jump_clobbers)
- {
- arm_ccfsm_state = 2;
- this_insn = next_nonnote_insn (this_insn);
- }
- else
- arm_ccfsm_state = 1;
+ arm_ccfsm_state = 1;
succeed = TRUE;
}
else
}
arm_target_insn = this_insn;
}
- if (jump_clobbers)
- {
- gcc_assert (!reverse);
- arm_current_cc =
- get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
- 0), 0), 1));
- if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
- if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
- }
- else
- {
- /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
- what it was. */
- if (!reverse)
- arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
- 0));
- }
+
+ /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+ what it was. */
+ if (!reverse)
+ arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
if (reverse || then_not_else)
arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
if (mode == DFmode)
return VFP_REGNO_OK_FOR_DOUBLE (regno);
+ /* VFP registers can hold HFmode values, but there is no point in
+ putting them there unless we have hardware conversion insns. */
+ if (mode == HFmode)
+ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+
if (TARGET_NEON)
return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
|| (VALID_NEON_QREG_MODE (mode)
return VALID_IWMMXT_REG_MODE (mode);
}
- /* We allow any value to be stored in the general registers.
+ /* We allow almost any value to be stored in the general registers.
Restrict doubleword quantities to even register pairs so that we can
- use ldrd. Do not allow Neon structure opaque modes in general registers;
- they would use too many. */
+ use ldrd. Do not allow very large Neon structure opaque modes in
+ general registers; they would use too many. */
if (regno <= LAST_ARM_REGNUM)
return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
- && !VALID_NEON_STRUCT_MODE (mode);
+ && ARM_NUM_REGS (mode) <= 4;
if (regno == FRAME_POINTER_REGNUM
|| regno == ARG_POINTER_REGNUM)
/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
not used in arm mode. */
-int
+
+enum reg_class
arm_regno_class (int regno)
{
if (TARGET_THUMB1)
{
#define IWMMXT_BUILTIN(code, string, builtin) \
{ FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, 0, 0 },
+ ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
#define IWMMXT_BUILTIN2(code, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
+ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
TREE_READONLY (decl) = 1;
}
-typedef enum {
+enum neon_builtin_type_bits {
T_V8QI = 0x0001,
T_V4HI = 0x0002,
T_V2SI = 0x0004,
T_TI = 0x0400,
T_EI = 0x0800,
T_OI = 0x1000
-} neon_builtin_type_bits;
+};
#define v8qi_UP T_V8QI
#define v4hi_UP T_V4HI
typedef struct {
const char *name;
const neon_itype itype;
- const neon_builtin_type_bits bits;
+ const int bits;
const enum insn_code codes[T_MAX];
const unsigned int num_vars;
unsigned int base_fcode;
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
layout_type (neon_float_type_node);
+ /* Define typedefs which exactly correspond to the modes we are basing vector
+ types on. If you change these names you'll need to change
+ the table used by arm_mangle_type too. */
+ (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
+ "__builtin_neon_qi");
+ (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
+ "__builtin_neon_hi");
+ (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
+ "__builtin_neon_si");
+ (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
+ "__builtin_neon_sf");
+ (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
+ "__builtin_neon_di");
+ (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
+ "__builtin_neon_poly8");
+ (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
+ "__builtin_neon_poly16");
+
intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+ (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
+ "__builtin_neon_uqi");
+ (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
+ "__builtin_neon_uhi");
+ (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
+ "__builtin_neon_usi");
+ (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
+ "__builtin_neon_udi");
+
/* Opaque integer types for structures of vectors. */
intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
+ (*lang_hooks.types.register_builtin_type) (intTI_type_node,
+ "__builtin_neon_ti");
+ (*lang_hooks.types.register_builtin_type) (intEI_type_node,
+ "__builtin_neon_ei");
+ (*lang_hooks.types.register_builtin_type) (intOI_type_node,
+ "__builtin_neon_oi");
+ (*lang_hooks.types.register_builtin_type) (intCI_type_node,
+ "__builtin_neon_ci");
+ (*lang_hooks.types.register_builtin_type) (intXI_type_node,
+ "__builtin_neon_xi");
+
/* Pointers to vector types. */
V8QI_pointer_node = build_pointer_type (V8QI_type_node);
V4HI_pointer_node = build_pointer_type (V4HI_type_node);
build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
V2DI_type_node, NULL);
- /* Define typedefs which exactly correspond to the modes we are basing vector
- types on. If you change these names you'll need to change
- the table used by arm_mangle_type too. */
- (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
- "__builtin_neon_qi");
- (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
- "__builtin_neon_hi");
- (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
- "__builtin_neon_si");
- (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
- "__builtin_neon_sf");
- (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
- "__builtin_neon_di");
-
- (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
- "__builtin_neon_poly8");
- (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
- "__builtin_neon_poly16");
- (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
- "__builtin_neon_uqi");
- (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
- "__builtin_neon_uhi");
- (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
- "__builtin_neon_usi");
- (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
- "__builtin_neon_udi");
-
- (*lang_hooks.types.register_builtin_type) (intTI_type_node,
- "__builtin_neon_ti");
- (*lang_hooks.types.register_builtin_type) (intEI_type_node,
- "__builtin_neon_ei");
- (*lang_hooks.types.register_builtin_type) (intOI_type_node,
- "__builtin_neon_oi");
- (*lang_hooks.types.register_builtin_type) (intCI_type_node,
- "__builtin_neon_ci");
- (*lang_hooks.types.register_builtin_type) (intXI_type_node,
- "__builtin_neon_xi");
-
dreg_types[0] = V8QI_type_node;
dreg_types[1] = V4HI_type_node;
dreg_types[2] = V2SI_type_node;
}
static void
+arm_init_fp16_builtins (void)
+{
+ tree fp16_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (fp16_type) = 16;
+ layout_type (fp16_type);
+ (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+}
+
+static void
arm_init_builtins (void)
{
arm_init_tls_builtins ();
if (TARGET_NEON)
arm_init_neon_builtins ();
+
+ if (arm_fp16_format)
+ arm_init_fp16_builtins ();
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */
+
+static const char *
+arm_invalid_parameter_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return N_("function parameters cannot have __fp16 type");
+ return NULL;
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */
+
+static const char *
+arm_invalid_return_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return N_("functions cannot return __fp16 type");
+ return NULL;
+}
+
+/* Implement TARGET_PROMOTED_TYPE. */
+
+static tree
+arm_promoted_type (const_tree t)
+{
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ return float_type_node;
+ return NULL_TREE;
+}
+
+/* Implement TARGET_CONVERT_TO_TYPE.
+ Specifically, this hook implements the peculiarity of the ARM
+ half-precision floating-point C semantics that requires conversions between
+ __fp16 to or from double to do an intermediate conversion to float. */
+
+static tree
+arm_convert_to_type (tree type, tree expr)
+{
+ tree fromtype = TREE_TYPE (expr);
+ if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
+ return NULL_TREE;
+ if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
+ || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
+ return convert (type, convert (float_type_node, expr));
+ return NULL_TREE;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
+ This simply adds HFmode as a supported mode; even though we don't
+ implement arithmetic on this type directly, it's supported by
+ optabs conversions, much the way the double-word arithmetic is
+ special-cased in the default hook. */
+
+static bool
+arm_scalar_mode_supported_p (enum machine_mode mode)
+{
+ if (mode == HFmode)
+ return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
+ else
+ return default_scalar_mode_supported_p (mode);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
static int
neon_builtin_compare (const void *a, const void *b)
{
- const neon_builtin_datum *key = a;
- const neon_builtin_datum *memb = b;
+ const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
+ const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
unsigned int soughtcode = key->base_fcode;
if (soughtcode >= memb->base_fcode
int idx;
key.base_fcode = fcode;
- found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
+ found = (neon_builtin_datum *)
+ bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
sizeof (neon_builtin_data[0]), neon_builtin_compare);
gcc_assert (found);
idx = fcode - (int) found->base_fcode;
for (;;)
{
- builtin_arg thisarg = va_arg (ap, int);
+ builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
if (thisarg == NEON_ARG_STOP)
break;
return;
}
- if (ARM_EABI_UNWIND_TABLES && push)
+ if (push && arm_except_unwind_info () == UI_TARGET)
{
fprintf (f, "\t.save\t{");
for (regno = 0; regno < 15; regno++)
if (push && pushed_words && dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
int pushed_mask = real_regs;
*cfa_offset += pushed_words * 4;
/* Return to caller. */
asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
}
-
\f
+/* Scan INSN just before assembler is output for it.
+ For Thumb-1, we track the status of the condition codes; this
+ information is used in the cbranchsi4_insn pattern. */
void
thumb1_final_prescan_insn (rtx insn)
{
if (flag_print_asm_name)
asm_fprintf (asm_out_file, "%@ 0x%04x\n",
INSN_ADDRESSES (INSN_UID (insn)));
+ /* Don't overwrite the previous setter when we get to a cbranch. */
+ if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+ {
+ enum attr_conds conds;
+
+ if (cfun->machine->thumb1_cc_insn)
+ {
+ if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
+ || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
+ CC_STATUS_INIT;
+ }
+ conds = get_attr_conds (insn);
+ if (conds == CONDS_SET)
+ {
+ rtx set = single_set (insn);
+ cfun->machine->thumb1_cc_insn = insn;
+ cfun->machine->thumb1_cc_op0 = SET_DEST (set);
+ cfun->machine->thumb1_cc_op1 = const0_rtx;
+ cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+ if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
+ {
+ rtx src1 = XEXP (SET_SRC (set), 1);
+ if (src1 == const0_rtx)
+ cfun->machine->thumb1_cc_mode = CCmode;
+ }
+ }
+ else if (conds != CONDS_NOCOND)
+ cfun->machine->thumb1_cc_insn = NULL_RTX;
+ }
}
int
unsigned HOST_WIDE_INT mask = 0xff;
int i;
+ val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
if (val == 0) /* XXX */
return 0;
#endif
}
+/* Given the stack offsets and register mask in OFFSETS, decide how
+ many additional registers to push instead of subtracting a constant
+ from SP. For epilogues the principle is the same except we use pop.
+ FOR_PROLOGUE indicates which we're generating. */
+static int
+thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
+{
+ HOST_WIDE_INT amount;
+ unsigned long live_regs_mask = offsets->saved_regs_mask;
+ /* Extract a mask of the ones we can give to the Thumb's push/pop
+ instruction. */
+ unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
+ /* Then count how many other high registers will need to be pushed. */
+ unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+ int n_free, reg_base;
+
+ if (!for_prologue && frame_pointer_needed)
+ amount = offsets->locals_base - offsets->saved_regs;
+ else
+ amount = offsets->outgoing_args - offsets->saved_regs;
+
+ /* If the stack frame size is 512 exactly, we can save one load
+ instruction, which should make this a win even when optimizing
+ for speed. */
+ if (!optimize_size && amount != 512)
+ return 0;
+
+ /* Can't do this if there are high registers to push. */
+ if (high_regs_pushed != 0)
+ return 0;
+
+ /* Shouldn't do it in the prologue if no registers would normally
+ be pushed at all. In the epilogue, also allow it if we'll have
+ a pop insn for the PC. */
+ if (l_mask == 0
+ && (for_prologue
+ || TARGET_BACKTRACE
+ || (live_regs_mask & 1 << LR_REGNUM) == 0
+ || TARGET_INTERWORK
+ || crtl->args.pretend_args_size != 0))
+ return 0;
+
+ /* Don't do this if thumb_expand_prologue wants to emit instructions
+ between the push and the stack frame allocation. */
+ if (for_prologue
+ && ((flag_pic && arm_pic_register != INVALID_REGNUM)
+ || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
+ return 0;
+
+ reg_base = 0;
+ n_free = 0;
+ if (!for_prologue)
+ {
+ reg_base = arm_size_return_regs () / UNITS_PER_WORD;
+ live_regs_mask >>= reg_base;
+ }
+
+ while (reg_base + n_free < 8 && !(live_regs_mask & 1)
+ && (for_prologue || call_used_regs[reg_base + n_free]))
+ {
+ live_regs_mask >>= 1;
+ n_free++;
+ }
+
+ if (n_free == 0)
+ return 0;
+ gcc_assert (amount / 4 * 4 == amount);
+
+ if (amount >= 512 && (amount - n_free * 4) < 512)
+ return (amount - 508) / 4;
+ if (amount <= n_free * 4)
+ return amount / 4;
+ return 0;
+}
+
/* The bits which aren't usefully expanded as rtl. */
const char *
thumb_unexpanded_epilogue (void)
int regno;
unsigned long live_regs_mask = 0;
int high_regs_pushed = 0;
+ int extra_pop;
int had_to_push_lr;
int size;
- if (return_used_this_function)
+ if (cfun->machine->return_used_this_function != 0)
return "";
if (IS_NAKED (arm_current_func_type ()))
the register is used to hold a return value. */
size = arm_size_return_regs ();
+ extra_pop = thumb1_extra_regs_pushed (offsets, false);
+ if (extra_pop > 0)
+ {
+ unsigned long extra_mask = (1 << extra_pop) - 1;
+ live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
+ }
+
/* The prolog may have pushed some high registers to use as
work registers. e.g. the testsuite file:
gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
live_regs_mask);
/* We have either just popped the return address into the
- PC or it is was kept in LR for the entire function. */
+ PC or it is was kept in LR for the entire function.
+ Note that thumb_pushpop has already called thumb_exit if the
+ PC was in the list. */
if (!had_to_push_lr)
thumb_exit (asm_out_file, LR_REGNUM);
}
arm_init_machine_status (void)
{
struct machine_function *machine;
- machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
+ machine = ggc_alloc_cleared_machine_function ();
#if ARM_FT_UNKNOWN != 0
machine->func_type = ARM_FT_UNKNOWN;
stack_pointer_rtx);
amount = offsets->outgoing_args - offsets->saved_regs;
+ amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
if (amount)
{
if (amount < 512)
been pushed at the start of the prologue and so we can corrupt
it now. */
for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
- if (live_regs_mask & (1 << regno)
- && !(frame_pointer_needed
- && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
+ if (live_regs_mask & (1 << regno))
break;
- if (regno > LAST_LO_REGNUM) /* Very unlikely. */
- {
- rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
-
- /* Choose an arbitrary, non-argument low register. */
- reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
-
- /* Save it by copying it into a high, scratch register. */
- emit_insn (gen_movsi (spare, reg));
- /* Add a USE to stop propagate_one_insn() from barfing. */
- emit_insn (gen_prologue_use (spare));
+ gcc_assert(regno <= LAST_LO_REGNUM);
- /* Decrement the stack. */
- emit_insn (gen_movsi (reg, GEN_INT (- amount)));
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
- stack_pointer_rtx, reg));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- plus_constant (stack_pointer_rtx,
- -amount));
- RTX_FRAME_RELATED_P (dwarf) = 1;
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (insn));
-
- /* Restore the low register's original value. */
- emit_insn (gen_movsi (reg, spare));
-
- /* Emit a USE of the restored scratch register, so that flow
- analysis will not consider the restore redundant. The
- register won't be used again in this function and isn't
- restored by the epilogue. */
- emit_insn (gen_prologue_use (reg));
- }
- else
- {
- reg = gen_rtx_REG (SImode, regno);
+ reg = gen_rtx_REG (SImode, regno);
- emit_insn (gen_movsi (reg, GEN_INT (- amount)));
+ emit_insn (gen_movsi (reg, GEN_INT (- amount)));
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
- stack_pointer_rtx, reg));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- plus_constant (stack_pointer_rtx,
- -amount));
- RTX_FRAME_RELATED_P (dwarf) = 1;
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
- REG_NOTES (insn));
- }
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx, reg));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ -amount));
+ RTX_FRAME_RELATED_P (dwarf) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
}
using the EABI unwinder, to prevent faulting instructions from being
swapped with a stack adjustment. */
if (crtl->profile || !TARGET_SCHED_PROLOG
- || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
+ || (arm_except_unwind_info () == UI_TARGET
+ && cfun->can_throw_non_call_exceptions))
emit_insn (gen_blockage ());
cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
amount = offsets->locals_base - offsets->saved_regs;
}
+ amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
gcc_assert (amount >= 0);
if (amount)
if (crtl->args.pretend_args_size)
{
/* Output unwind directive for the stack adjustment. */
- if (ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info () == UI_TARGET)
fprintf (f, "\t.pad #%d\n",
crtl->args.pretend_args_size);
the stack pointer. */
if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
cfa_offset = cfa_offset + crtl->args.pretend_args_size;
dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
work_register = thumb_find_work_register (live_regs_mask);
- if (ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info () == UI_TARGET)
asm_fprintf (f, "\t.pad #16\n");
asm_fprintf
if (dwarf2out_do_frame ())
{
- char *l = dwarf2out_cfi_label ();
+ char *l = dwarf2out_cfi_label (false);
cfa_offset = cfa_offset + 16;
dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
register. */
else if ((l_mask & 0xff) != 0
|| (high_regs_pushed == 0 && l_mask))
- thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
+ {
+ unsigned long mask = l_mask;
+ mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
+ thumb_pushpop (f, mask, 1, &cfa_offset, mask);
+ }
if (high_regs_pushed)
{
if (TARGET_BPABI)
{
const char *fpu_name;
- if (arm_select[0].string)
- asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
- else if (arm_select[1].string)
- asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
+ if (arm_selected_arch)
+ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
else
- asm_fprintf (asm_out_file, "\t.cpu %s\n",
- all_cores[arm_default_cpu].name);
+ asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
if (TARGET_SOFT_FLOAT)
{
}
else
{
- int set_float_abi_attributes = 0;
- switch (arm_fpu_arch)
- {
- case FPUTYPE_FPA:
- fpu_name = "fpa";
- break;
- case FPUTYPE_FPA_EMU2:
- fpu_name = "fpe2";
- break;
- case FPUTYPE_FPA_EMU3:
- fpu_name = "fpe3";
- break;
- case FPUTYPE_MAVERICK:
- fpu_name = "maverick";
- break;
- case FPUTYPE_VFP:
- fpu_name = "vfp";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3:
- fpu_name = "vfp3";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON:
- fpu_name = "neon";
- set_float_abi_attributes = 1;
- break;
- default:
- abort();
- }
- if (set_float_abi_attributes)
+ fpu_name = arm_fpu_desc->name;
+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
{
if (TARGET_HARD_FLOAT)
asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_ABI_FP_16bit_format. */
+ if (arm_fp16_format)
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
+ (int)arm_fp16_format);
+
if (arm_lang_output_object_attributes_hook)
arm_lang_output_object_attributes_hook();
}
return 1;
}
+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
+ HFmode constant pool entries are actually loaded with ldr. */
+void
+arm_emit_fp16_const (rtx c)
+{
+ REAL_VALUE_TYPE r;
+ long bits;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+ bits = real_to_target (NULL, &r, HFmode);
+ if (WORDS_BIG_ENDIAN)
+ assemble_zeros (2);
+ assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
+ if (!WORDS_BIG_ENDIAN)
+ assemble_zeros (2);
+}
+
const char *
arm_output_load_gr (rtx *operands)
{
that way. */
static void
-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
enum machine_mode mode,
tree type,
int *pretend_size,
int second_time ATTRIBUTE_UNUSED)
{
- int nregs = cum->nregs;
- if (nregs & 1
- && ARM_DOUBLEWORD_ALIGN
- && arm_needs_doubleword_align (mode, type))
- nregs++;
-
+ int nregs;
+
cfun->machine->uses_anonymous_args = 1;
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ nregs = pcum->aapcs_ncrn;
+ if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+ nregs++;
+ }
+ else
+ nregs = pcum->nregs;
+
if (nregs < NUM_ARG_REGS)
*pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
}
return !reg_overlap_mentioned_p (value, addr);
}
+/* Return nonzero if the CONSUMER instruction (a store) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+ return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+ rtx value = PATTERN (producer);
+ rtx addr = PATTERN (consumer);
+
+ if (GET_CODE (value) == COND_EXEC)
+ value = COND_EXEC_CODE (value);
+ if (GET_CODE (value) == PARALLEL)
+ value = XVECEXP (value, 0, 0);
+ value = XEXP (value, 0);
+ if (GET_CODE (addr) == COND_EXEC)
+ addr = COND_EXEC_CODE (addr);
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+ addr = XEXP (addr, 1);
+
+ return reg_overlap_mentioned_p (value, addr);
+}
+
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the
result of PRODUCER. */
op = XVECEXP (op, 0, 0);
op = XEXP (op, 1);
- return (GET_CODE (op) == PLUS
- && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
+ if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+ {
+ if (GET_CODE (XEXP (op, 0)) == MULT)
+ return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+ else
+ return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+ }
+
+ return 0;
}
/* We can't rely on the caller doing the proper promotion when
return !TARGET_AAPCS_BASED;
}
+static enum machine_mode
+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+ enum machine_mode mode,
+ int *punsignedp ATTRIBUTE_UNUSED,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ int for_return ATTRIBUTE_UNUSED)
+{
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ return SImode;
+
+ return mode;
+}
/* AAPCS based ABIs use short enums by default. */
static void
arm_cxx_determine_class_data_visibility (tree decl)
{
- if (!TARGET_AAPCS_BASED)
+ if (!TARGET_AAPCS_BASED
+ || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
return;
/* In general, \S 3.2.5.5 of the ARM EABI requires that class data
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode))
return true;
- if ((mode == V2SImode)
- || (mode == V4HImode)
- || (mode == V8QImode))
+ if ((TARGET_NEON || TARGET_IWMMXT)
+ && ((mode == V2SImode)
+ || (mode == V4HImode)
+ || (mode == V8QImode)))
+ return true;
+
+ return false;
+}
+
+/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+ registers when autovectorizing for Neon, at least until multiple vector
+ widths are supported properly by the middle-end. */
+
+static enum machine_mode
+arm_preferred_simd_mode (enum machine_mode mode)
+{
+ if (TARGET_NEON)
+ switch (mode)
+ {
+ case SFmode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+ case SImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+ case HImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+ case QImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+ case DImode:
+ if (TARGET_NEON_VECTORIZE_QUAD)
+ return V2DImode;
+ break;
+
+ default:;
+ }
+
+ if (TARGET_REALLY_IWMMXT)
+ switch (mode)
+ {
+ case SImode:
+ return V2SImode;
+ case HImode:
+ return V4HImode;
+ case QImode:
+ return V8QImode;
+
+ default:;
+ }
+
+ return word_mode;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+
+ We need to define this for LO_REGS on thumb. Otherwise we can end up
+ using r0-r4 for function arguments, r7 for the stack frame and don't
+ have enough left over to do doubleword arithmetic. */
+
+static bool
+arm_class_likely_spilled_p (reg_class_t rclass)
+{
+ if ((TARGET_THUMB && rclass == LO_REGS)
+ || rclass == CC_REG)
return true;
return false;
}
+/* Implements target hook small_register_classes_for_mode_p. */
+bool
+arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+ return TARGET_THUMB1;
+}
+
/* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
ARM insns and therefore guarantee that the shift count is modulo 256.
DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
if (IS_FPA_REGNUM (regno))
return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
- /* FIXME: VFPv3 register numbering. */
if (IS_VFP_REGNUM (regno))
- return 64 + regno - FIRST_VFP_REGNUM;
+ {
+ /* See comment in arm_dwarf_register_span. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return 64 + regno - FIRST_VFP_REGNUM;
+ else
+ return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+ }
if (IS_IWMMXT_GR_REGNUM (regno))
return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
gcc_unreachable ();
}
+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+ GCC models tham as 64 32-bit registers, so we need to describe this to
+ the DWARF generation code. Other registers can use the default. */
+static rtx
+arm_dwarf_register_span (rtx rtl)
+{
+ unsigned regno;
+ int nregs;
+ int i;
+ rtx p;
+
+ regno = REGNO (rtl);
+ if (!IS_VFP_REGNUM (regno))
+ return NULL_RTX;
+
+ /* XXX FIXME: The EABI defines two VFP register ranges:
+ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+ 256-287: D0-D31
+ The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
+ corresponding D register. Until GDB supports this, we shall use the
+ legacy encodings. We also use these encodings for D0-D15 for
+ compatibility with older debuggers. */
+ if (VFP_REGNO_OK_FOR_SINGLE (regno))
+ return NULL_RTX;
+
+ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
+ regno = (regno - FIRST_VFP_REGNUM) / 2;
+ for (i = 0; i < nregs; i++)
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+
+ return p;
+}
-#ifdef TARGET_UNWIND_INFO
+#if ARM_UNWIND_INFO
/* Emit unwind directives for a store-multiple instruction or stack pointer
push during alignment.
These should only ever be generated by the function prologue code, so
offset = INTVAL (XEXP (e1, 1));
asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
HARD_FRAME_POINTER_REGNUM, reg,
- INTVAL (XEXP (e1, 1)));
+ offset);
}
else if (GET_CODE (e1) == REG)
{
{
rtx pat;
- if (!ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info () != UI_TARGET)
return;
if (!(flag_unwind_tables || crtl->uses_eh_lsda)
return TRUE;
}
-#endif /* TARGET_UNWIND_INFO */
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
+
+static void
+arm_asm_emit_except_personality (rtx personality)
+{
+ fputs ("\t.personality\t", asm_out_file);
+ output_addr_const (asm_out_file, personality);
+ fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
+
+static void
+arm_asm_init_sections (void)
+{
+ exception_section = get_unnamed_section (0, output_section_asm_op,
+ "\t.handlerdata");
+}
+#endif /* ARM_UNWIND_INFO */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO. */
+
+static enum unwind_info_type
+arm_except_unwind_info (void)
+{
+ /* Honor the --enable-sjlj-exceptions configure switch. */
+#ifdef CONFIG_SJLJ_EXCEPTIONS
+ if (CONFIG_SJLJ_EXCEPTIONS)
+ return UI_SJLJ;
+#endif
+
+ /* If not using ARM EABI unwind tables... */
+ if (ARM_UNWIND_INFO)
+ {
+ /* For simplicity elsewhere in this file, indicate that all unwind
+ info is disabled if we're not emitting unwind tables. */
+ if (!flag_exceptions && !flag_unwind_tables)
+ return UI_NONE;
+ else
+ return UI_TARGET;
+ }
+
+ /* ... we use sjlj exceptions for backwards compatibility. */
+ return UI_SJLJ;
+}
/* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
void
arm_output_fn_unwind (FILE * f, bool prologue)
{
- if (!ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info () != UI_TARGET)
return;
if (prologue)
rtx val;
val = XVECEXP (x, 0, 0);
- reloc = INTVAL (XVECEXP (x, 0, 1));
+ reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
output_addr_const (fp, val);
fputs ("(tlsldo)", file);
}
-bool
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
+
+static bool
arm_output_addr_const_extra (FILE *fp, rtx x)
{
if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
return TRUE;
}
+ else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
+ {
+ assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
+ if (GOT_PCREL)
+ fputs ("+.", fp);
+ fputs ("-(", fp);
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ fputc (')', fp);
+ return TRUE;
+ }
+ else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
+ {
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ if (GOT_PCREL)
+ fputs ("+.", fp);
+ fputs ("-(", fp);
+ output_addr_const (fp, XVECEXP (x, 0, 1));
+ fputc (')', fp);
+ return TRUE;
+ }
else if (GET_CODE (x) == CONST_VECTOR)
return arm_emit_vector_const (fp, x);
return "";
}
+/* Output a Thumb-1 casesi dispatch sequence. */
+const char *
+thumb1_output_casesi (rtx *operands)
+{
+ rtx diff_vec = PATTERN (next_real_insn (operands[0]));
+
+ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+ switch (GET_MODE(diff_vec))
+ {
+ case QImode:
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+ "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
+ case HImode:
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
+ "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
+ case SImode:
+ return "bl\t%___gnu_thumb1_case_si";
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Output a Thumb-2 casesi instruction. */
const char *
thumb2_output_casesi (rtx *operands)
switch (arm_tune)
{
case cortexr4:
+ case cortexr4f:
+ case cortexa5:
case cortexa8:
+ case cortexa9:
return 2;
default:
{
arm_mangle_map_entry *pos = arm_mangle_map;
+ /* The ARM ABI documents (10th October 2008) say that "__va_list"
+ has to be managled as if it is in the "std" namespace. */
+ if (TARGET_AAPCS_BASED
+ && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+ {
+ static bool warned;
+ if (!warned && warn_psabi && !in_system_header)
+ {
+ warned = true;
+ inform (input_location,
+ "the mangling of %<va_list%> has changed in GCC 4.4");
+ }
+ return "St9__va_list";
+ }
+
+ /* Half-precision float. */
+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+ return "Dh";
+
if (TREE_CODE (type) != VECTOR_TYPE)
return NULL;
return NULL;
}
+/* Order of allocation of core registers for Thumb: this allocation is
+ written over the corresponding initial entries of the array
+ initialized with REG_ALLOC_ORDER. We allocate all low registers
+ first. Saving and restoring a low register is usually cheaper than
+ using a call-clobbered high register. */
+
+static const int thumb_core_reg_alloc_order[] =
+{
+ 3, 2, 1, 0, 4, 5, 6, 7,
+ 14, 12, 8, 9, 10, 11, 13, 15
+};
+
+/* Adjust register allocation order when compiling for Thumb. */
+
+void
+arm_order_regs_for_local_alloc (void)
+{
+ const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
+ memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
+ if (TARGET_THUMB)
+ memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
+ sizeof (thumb_core_reg_alloc_order));
+}
+
+/* Set default optimization options. */
+static void
+arm_option_optimization (int level, int size ATTRIBUTE_UNUSED)
+{
+ /* Enable section anchors by default at -O1 or higher.
+ Use 2 to distinguish from an explicit -fsection-anchors
+ given on the command line. */
+ if (level > 0)
+ flag_section_anchors = 2;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED. */
+
+bool
+arm_frame_pointer_required (void)
+{
+ return (cfun->has_nonlocal_label
+ || SUBTARGET_FRAME_POINTER_REQUIRED
+ || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
+}
+
+/* Only thumb1 can't support conditional execution, so return true if
+ the target is not thumb1. */
+static bool
+arm_have_conditional_execution (void)
+{
+ return !TARGET_THUMB1;
+}
+
+/* Legitimize a memory reference for sync primitive implemented using
+ ldrex / strex. We currently force the form of the reference to be
+ indirect without offset. We do not yet support the indirect offset
+ addressing supported by some ARM targets for these
+ instructions. */
+static rtx
+arm_legitimize_sync_memory (rtx memory)
+{
+ rtx addr = force_reg (Pmode, XEXP (memory, 0));
+ rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
+
+ set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
+ MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
+ return legitimate_memory;
+}
+
+/* An instruction emitter. */
+typedef void (* emit_f) (int label, const char *, rtx *);
+
+/* An instruction emitter that emits via the conventional
+ output_asm_insn. */
+static void
+arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
+{
+ output_asm_insn (pattern, operands);
+}
+
+/* Count the number of emitted synchronization instructions. */
+static unsigned arm_insn_count;
+
+/* An emitter that counts emitted instructions but does not actually
+ emit instruction into the the instruction stream. */
+static void
+arm_count (int label,
+ const char *pattern ATTRIBUTE_UNUSED,
+ rtx *operands ATTRIBUTE_UNUSED)
+{
+ if (! label)
+ ++ arm_insn_count;
+}
+
+/* Construct a pattern using conventional output formatting and feed
+ it to output_asm_insn. Provides a mechanism to construct the
+ output pattern on the fly. Note the hard limit on the pattern
+ buffer size. */
+static void
+arm_output_asm_insn (emit_f emit, int label, rtx *operands,
+ const char *pattern, ...)
+{
+ va_list ap;
+ char buffer[256];
+
+ va_start (ap, pattern);
+ vsprintf (buffer, pattern, ap);
+ va_end (ap);
+ emit (label, buffer, operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target to a specified emitter. */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+ if (TARGET_HAVE_DMB)
+ {
+ /* Note we issue a system level barrier. We should consider
+ issuing a inner shareabilty zone barrier here instead, ie.
+ "DMB ISH". */
+ emit (0, "dmb\tsy", operands);
+ return;
+ }
+
+ if (TARGET_HAVE_DMB_MCR)
+ {
+ emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+ return;
+ }
+
+ gcc_unreachable ();
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target. */
+const char *
+arm_output_memory_barrier (rtx *operands)
+{
+ arm_process_output_memory_barrier (arm_emit, operands);
+ return "";
+}
+
+/* Helper to figure out the instruction suffix required on ldrex/strex
+ for operations on an object of the specified mode. */
+static const char *
+arm_ldrex_suffix (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case QImode: return "b";
+ case HImode: return "h";
+ case SImode: return "";
+ case DImode: return "d";
+ default:
+ gcc_unreachable ();
+ }
+ return "";
+}
+
+/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_ldrex (emit_f emit,
+ enum machine_mode mode,
+ rtx target,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[2];
+
+ operands[0] = target;
+ operands[1] = memory;
+ arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+}
+
+/* Emit a strex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_strex (emit_f emit,
+ enum machine_mode mode,
+ const char *cc,
+ rtx result,
+ rtx value,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[3];
+
+ operands[0] = result;
+ operands[1] = value;
+ operands[2] = memory;
+ arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+ cc);
+}
+
+/* Helper to emit a two operand instruction. */
+static void
+arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
+{
+ rtx operands[2];
+
+ operands[0] = d;
+ operands[1] = s;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
+}
+
+/* Helper to emit a three operand instruction. */
+static void
+arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
+{
+ rtx operands[3];
+
+ operands[0] = d;
+ operands[1] = a;
+ operands[2] = b;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
+}
+
+/* Emit a load store exclusive synchronization loop.
+
+ do
+ old_value = [mem]
+ if old_value != required_value
+ break;
+ t1 = sync_op (old_value, new_value)
+ [mem] = t1, t2 = [0|1]
+ while ! t2
+
+ Note:
+ t1 == t2 is not permitted
+ t1 == old_value is permitted
+
+ required_value:
+
+ RTX register or const_int representing the required old_value for
+ the modify to continue, if NULL no comparsion is performed. */
+static void
+arm_output_sync_loop (emit_f emit,
+ enum machine_mode mode,
+ rtx old_value,
+ rtx memory,
+ rtx required_value,
+ rtx new_value,
+ rtx t1,
+ rtx t2,
+ enum attr_sync_op sync_op,
+ int early_barrier_required)
+{
+ rtx operands[1];
+
+ gcc_assert (t1 != t2);
+
+ if (early_barrier_required)
+ arm_process_output_memory_barrier (emit, NULL);
+
+ arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
+
+ arm_output_ldrex (emit, mode, old_value, memory);
+
+ if (required_value)
+ {
+ rtx operands[2];
+
+ operands[0] = old_value;
+ operands[1] = required_value;
+ arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+ }
+
+ switch (sync_op)
+ {
+ case SYNC_OP_ADD:
+ arm_output_op3 (emit, "add", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_SUB:
+ arm_output_op3 (emit, "sub", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_IOR:
+ arm_output_op3 (emit, "orr", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_XOR:
+ arm_output_op3 (emit, "eor", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_AND:
+ arm_output_op3 (emit,"and", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_NAND:
+ arm_output_op3 (emit, "and", t1, old_value, new_value);
+ arm_output_op2 (emit, "mvn", t1, t1);
+ break;
+
+ case SYNC_OP_NONE:
+ t1 = new_value;
+ break;
+ }
+
+ arm_output_strex (emit, mode, "", t2, t1, memory);
+ operands[0] = t2;
+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
+
+ arm_process_output_memory_barrier (emit, NULL);
+ arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+}
+
+static rtx
+arm_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+ if (index > 0)
+ default_value = operands[index - 1];
+
+ return default_value;
+}
+
+#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
+ arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
+
+/* Extract the operands for a synchroniztion instruction from the
+ instructions attributes and emit the instruction. */
+static void
+arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
+{
+ rtx result, memory, required_value, new_value, t1, t2;
+ int early_barrier;
+ enum machine_mode mode;
+ enum attr_sync_op sync_op;
+
+ result = FETCH_SYNC_OPERAND(result, 0);
+ memory = FETCH_SYNC_OPERAND(memory, 0);
+ required_value = FETCH_SYNC_OPERAND(required_value, 0);
+ new_value = FETCH_SYNC_OPERAND(new_value, 0);
+ t1 = FETCH_SYNC_OPERAND(t1, 0);
+ t2 = FETCH_SYNC_OPERAND(t2, 0);
+ early_barrier =
+ get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
+ sync_op = get_attr_sync_op (insn);
+ mode = GET_MODE (memory);
+
+ arm_output_sync_loop (emit, mode, result, memory, required_value,
+ new_value, t1, t2, sync_op, early_barrier);
+}
+
+/* Emit a synchronization instruction loop. */
+const char *
+arm_output_sync_insn (rtx insn, rtx *operands)
+{
+ arm_process_output_sync_insn (arm_emit, insn, operands);
+ return "";
+}
+
+/* Count the number of machine instruction that will be emitted for a
+ synchronization instruction. Note that the emitter used does not
+ emit instructions, it just counts instructions being carefull not
+ to count labels. */
+unsigned int
+arm_sync_loop_insns (rtx insn, rtx *operands)
+{
+ arm_insn_count = 0;
+ arm_process_output_sync_insn (arm_count, insn, operands);
+ return arm_insn_count;
+}
+
+/* Helper to call a target sync instruction generator, dealing with
+ the variation in operands required by the different generators. */
+static rtx
+arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
+ rtx memory, rtx required_value, rtx new_value)
+{
+ switch (generator->op)
+ {
+ case arm_sync_generator_omn:
+ gcc_assert (! required_value);
+ return generator->u.omn (old_value, memory, new_value);
+
+ case arm_sync_generator_omrn:
+ gcc_assert (required_value);
+ return generator->u.omrn (old_value, memory, required_value, new_value);
+ }
+
+ return NULL;
+}
+
+/* Expand a synchronization loop. The synchronization loop is expanded
+ as an opaque block of instructions in order to ensure that we do
+ not subsequently get extraneous memory accesses inserted within the
+ critical region. The exclusive access property of ldrex/strex is
+ only guaranteed in there are no intervening memory accesses. */
+void
+arm_expand_sync (enum machine_mode mode,
+ struct arm_sync_generator *generator,
+ rtx target, rtx memory, rtx required_value, rtx new_value)
+{
+ if (target == NULL)
+ target = gen_reg_rtx (mode);
+
+ memory = arm_legitimize_sync_memory (memory);
+ if (mode != SImode)
+ {
+ rtx load_temp = gen_reg_rtx (SImode);
+
+ if (required_value)
+ required_value = convert_modes (SImode, mode, required_value, true);
+
+ new_value = convert_modes (SImode, mode, new_value, true);
+ emit_insn (arm_call_generator (generator, load_temp, memory,
+ required_value, new_value));
+ emit_move_insn (target, gen_lowpart (mode, load_temp));
+ }
+ else
+ {
+ emit_insn (arm_call_generator (generator, target, memory, required_value,
+ new_value));
+ }
+}
+
+static bool
+arm_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+ /* Vectors which aren't in packed structures will not be less aligned than
+ the natural alignment of their element type, so this is safe. */
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ return !is_packed;
+
+ return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type, int misalignment,
+ bool is_packed)
+{
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
+
+ if (is_packed)
+ return align == 1;
+
+ /* If the misalignment is unknown, we should be able to handle the access
+ so long as it is not to a member of a packed data structure. */
+ if (misalignment == -1)
+ return true;
+
+ /* Return true if the misalignment is a multiple of the natural alignment
+ of the vector's element type. This is probably always going to be
+ true in practice, since we've already established that this isn't a
+ packed access. */
+ return ((misalignment % align) == 0);
+ }
+
+ return default_builtin_support_vector_misalignment (mode, type, misalignment,
+ is_packed);
+}
+
#include "gt-arm.h"