#include "expr.h"
#include "optabs.h"
#include "diagnostic-core.h"
-#include "toplev.h"
#include "recog.h"
#include "cgraph.h"
#include "ggc.h"
void (*arm_lang_output_object_attributes_hook)(void);
/* Forward function declarations. */
+static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
static arm_stack_offsets *arm_get_frame_offsets (void);
static void arm_add_gc_roots (void);
static rtx emit_set_insn (rtx, rtx);
static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
+static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
const_tree);
static int aapcs_select_return_coproc (const_tree, const_tree);
static bool arm_return_in_msb (const_tree);
static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
static bool arm_return_in_memory (const_tree, const_tree);
-#ifdef TARGET_UNWIND_INFO
+#if ARM_UNWIND_INFO
static void arm_unwind_emit (FILE *, rtx);
static bool arm_output_ttype (rtx);
+static void arm_asm_emit_except_personality (rtx);
+static void arm_asm_init_sections (void);
#endif
+static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
static rtx arm_dwarf_register_span (rtx);
static tree arm_build_builtin_va_list (void);
static void arm_expand_builtin_va_start (tree, rtx);
static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static void arm_option_override (void);
static bool arm_handle_option (size_t, const char *, int);
static void arm_target_help (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
static bool arm_tls_symbol_p (rtx x);
static int arm_issue_rate (void);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static bool arm_output_addr_const_extra (FILE *, rtx);
static bool arm_allocate_stack_slots_for_args (void);
static const char *arm_invalid_parameter_type (const_tree t);
static const char *arm_invalid_return_type (const_tree t);
static rtx arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
+static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+static bool arm_class_likely_spilled_p (reg_class_t);
+static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type,
+ int misalignment,
+ bool is_packed);
+static void arm_conditional_register_usage (void);
+static reg_class_t arm_preferred_rename_class (reg_class_t class);
\f
/* Table of machine attributes. */
#endif
{ NULL, 0, 0, false, false, false, NULL }
};
+
+/* Set default optimization options. */
+static const struct default_options arm_option_optimization_table[] =
+ {
+ /* Enable section anchors by default at -O1 or higher. */
+ { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+ { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+ { OPT_LEVELS_NONE, 0, NULL, 0 }
+ };
\f
/* Initialize the GCC target structure. */
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
+
#undef TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
#define TARGET_HANDLE_OPTION arm_handle_option
#undef TARGET_HELP
#define TARGET_HELP arm_target_help
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arm_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arm_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
-#ifdef TARGET_UNWIND_INFO
+#if ARM_UNWIND_INFO
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
#undef TARGET_ARM_EABI_UNWINDER
#define TARGET_ARM_EABI_UNWINDER true
-#endif /* TARGET_UNWIND_INFO */
+
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
+#endif /* ARM_UNWIND_INFO */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE arm_can_eliminate
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+ arm_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+ arm_builtin_support_vector_misalignment
+
+#undef TARGET_PREFERRED_RENAME_CLASS
+#define TARGET_PREFERRED_RENAME_CLASS \
+ arm_preferred_rename_class
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Obstack for minipool constant handling. */
#define FL_NEON (1 << 20) /* Neon instructions. */
#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
architecture. */
+#define FL_ARCH7 (1 << 22) /* Architecture 7. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
-#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
+#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
/* Nonzero if this chip supports the ARM 6K extensions. */
int arm_arch6k = 0;
+/* Nonzero if this chip supports the ARM 7 extensions. */
+int arm_arch7 = 0;
+
/* Nonzero if instructions not present in the 'M' profile can be used. */
int arm_arch_notm = 0;
const struct tune_params *const tune;
};
+
+#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
+#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
+ prefetch_slots, \
+ l1_size, \
+ l1_line_size
+
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
- 3
+ 3,
+ ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
- 1
+ 1,
+ ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_xscale_tune =
{
arm_xscale_rtx_costs,
xscale_sched_adjust_cost,
- 2
+ 2,
+ ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
- 1
+ 1,
+ ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
cortex_a9_sched_adjust_cost,
- 1
+ 1,
+ ARM_PREFETCH_BENEFICIAL(4,32,32)
};
va_list_type);
DECL_ARTIFICIAL (va_list_name) = 1;
TYPE_NAME (va_list_type) = va_list_name;
+ TYPE_STUB_DECL (va_list_type) = va_list_name;
/* Create the __ap field. */
ap_field = build_decl (BUILTINS_LOCATION,
FIELD_DECL,
{
const char *p;
- GET_ENVIRONMENT (p, "COLUMNS");
+ p = getenv ("COLUMNS");
if (p != NULL)
{
int value = atoi (p);
}
-/* Fix up any incompatible options that the user has specified.
- This has now turned into a maze. */
-void
-arm_override_options (void)
+/* Fix up any incompatible options that the user has specified. */
+static void
+arm_option_override (void)
{
unsigned i;
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+ SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
if (arm_selected_arch)
{
if (arm_selected_cpu)
arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
#endif
/* Default to ARM6. */
- if (arm_selected_cpu->name)
+ if (!arm_selected_cpu->name)
arm_selected_cpu = &all_cores[arm6];
}
/* Callee super interworking implies thumb interworking. Adding
this to the flags here simplifies the logic elsewhere. */
if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
- target_flags |= MASK_INTERWORK;
+ target_flags |= MASK_INTERWORK;
/* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
from here where no function is being compiled currently. */
if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
- if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
- warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
-
if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
{
warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
arm_arch6 = (insn_flags & FL_ARCH6) != 0;
arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch7 = (insn_flags & FL_ARCH7) != 0;
arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
flag_reorder_blocks = 1;
}
- if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
- && flag_pic)
+ if (flag_pic)
/* Hoisting PIC address calculations more aggressively provides a small,
but measurable, size reduction for PIC code. Therefore, we decrease
the bar for unrestricted expression hoisting to the cost of PIC address
calculation, which is 2 instructions. */
- set_param_value ("gcse-unrestricted-cost", 2);
+ maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
+
+ /* ARM EABI defaults to strict volatile bitfields. */
+ if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
+ flag_strict_volatile_bitfields = 1;
+
+ /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
+ it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
+ if (flag_prefetch_loop_arrays < 0
+ && HAVE_prefetch
+ && optimize >= 3
+ && current_tune->num_prefetch_slots > 0)
+ flag_prefetch_loop_arrays = 1;
+
+ /* Set up parameters to be used in prefetching algorithm. Do not override the
+ defaults unless we are tuning for a core we have researched values for. */
+ if (current_tune->num_prefetch_slots > 0)
+ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+ current_tune->num_prefetch_slots,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
+ if (current_tune->l1_cache_line_size >= 0)
+ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+ current_tune->l1_cache_line_size,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
+ if (current_tune->l1_cache_size >= 0)
+ maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+ current_tune->l1_cache_size,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
if (optimize > 0
&& (TREE_NOTHROW (current_function_decl)
|| !(flag_unwind_tables
- || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
+ || (flag_exceptions
+ && arm_except_unwind_info (&global_options) != UI_SJLJ)))
&& TREE_THIS_VOLATILE (current_function_decl))
type |= ARM_FT_VOLATILE;
{
HOST_WIDE_INT v;
- /* Allow repeated pattern. */
+ /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
v = i & 0xff;
v |= v << 16;
if (i == v || i == (v | (v << 8)))
return TRUE;
+
+ /* Allow repeated pattern 0xXY00XY00. */
+ v = i & 0xff00;
+ v |= v << 16;
+ if (i == v)
+ return TRUE;
}
return FALSE;
if (user_convention)
{
if (user_pcs > ARM_PCS_AAPCS_LOCAL)
- sorry ("Non-AAPCS derived PCS variant");
+ sorry ("non-AAPCS derived PCS variant");
else if (base_rules && user_pcs != ARM_PCS_AAPCS)
- error ("Variadic functions must use the base AAPCS variant");
+ error ("variadic functions must use the base AAPCS variant");
}
if (base_rules)
static int
aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type)
+ const_tree type)
{
int i;
numbers referred to here are those in the AAPCS. */
static void
aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, int named)
+ const_tree type, bool named)
{
int nregs, nregs2;
int ncrn;
/* Return true if mode/type need doubleword alignment. */
-bool
-arm_needs_doubleword_align (enum machine_mode mode, tree type)
+static bool
+arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
{
return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
|| (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
CUM is a variable of type CUMULATIVE_ARGS which gives info about
the preceding args and about the function being called.
NAMED is nonzero if this argument is a named parameter
- (otherwise it is an extra parameter matching an ellipsis). */
+ (otherwise it is an extra parameter matching an ellipsis).
-rtx
+ On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
+ other arguments are passed on the stack. If (NAMED == 0) (which happens
+ only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
+ defined), say it is passed in the stack (function_prologue will
+ indeed make it pass in the stack if necessary). */
+
+static rtx
arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, int named)
+ const_tree type, bool named)
{
int nregs;
return gen_rtx_REG (mode, pcum->nregs);
}
+static unsigned int
+arm_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+ return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
+ ? DOUBLEWORD_ALIGNMENT
+ : PARM_BOUNDARY);
+}
+
static int
arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
tree type, bool named)
return 0;
}
-void
+/* Update the data in PCUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
+
+static void
arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, bool named)
+ const_tree type, bool named)
{
if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
{
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
- if (TARGET_NEON
- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ /* For quad modes, we restrict the constant offset to be slightly less
+ than what the instruction format permits. We do this because for
+ quad mode moves, we will actually decompose them into two separate
+ double-mode reads or writes. INDEX must therefore be a valid
+ (double-mode) offset and so should INDEX+8. */
+ if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
return (code == CONST_INT
&& INTVAL (index) < 1016
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
+ /* We have no such constraint on double mode offsets, so we permit the
+ full range of the instruction format. */
+ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+ return (code == CONST_INT
+ && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
return (code == CONST_INT
&& INTVAL (index) < 1024
&& (INTVAL (index) & 3) == 0);
}
- if (TARGET_NEON
- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ /* For quad modes, we restrict the constant offset to be slightly less
+ than what the instruction format permits. We do this because for
+ quad mode moves, we will actually decompose them into two separate
+ double-mode reads or writes. INDEX must therefore be a valid
+ (double-mode) offset and so should INDEX+8. */
+ if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
return (code == CONST_INT
&& INTVAL (index) < 1016
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
+ /* We have no such constraint on double mode offsets, so we permit the
+ full range of the instruction format. */
+ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+ return (code == CONST_INT
+ && INTVAL (index) < 1024
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
if (arm_address_register_rtx_p (index, strict_p)
&& (GET_MODE_SIZE (mode) <= 4))
return 1;
&& (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
|| REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
|| (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
- && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
+ && REGNO (XEXP (x, 0))
+ <= LAST_VIRTUAL_POINTER_REGISTER))
&& GET_MODE_SIZE (mode) >= 4
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& (INTVAL (XEXP (x, 1)) & 3) == 0)
return arm_address_register_rtx_p (ind, 0);
/* Allow post-increment with Neon registers. */
- if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
+ if ((type != 1 && GET_CODE (ind) == POST_INC)
+ || (type == 0 && GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
/* FIXME: vld1 allows register post-modify. */
FOR_EACH_BB (bb)
{
rtx insn;
+
COPY_REG_SET (&live, DF_LR_OUT (bb));
df_simulate_initialize_backwards (bb, &live);
FOR_BB_INSNS_REVERSE (bb, insn)
rtx dst = XEXP (pat, 0);
rtx src = XEXP (pat, 1);
rtx op0 = XEXP (src, 0);
+ rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
+ ? XEXP (src, 1) : NULL);
+
if (rtx_equal_p (dst, op0)
|| GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
{
rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
rtvec vec = gen_rtvec (2, pat, clobber);
+
+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+ INSN_CODE (insn) = -1;
+ }
+ /* We can also handle a commutative operation where the
+ second operand matches the destination. */
+ else if (op1 && rtx_equal_p (dst, op1))
+ {
+ rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+ rtvec vec;
+
+ src = copy_rtx (src);
+ XEXP (src, 0) = op1;
+ XEXP (src, 1) = op0;
+ pat = gen_rtx_SET (VOIDmode, dst, src);
+ vec = gen_rtvec (2, pat, clobber);
PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
INSN_CODE (insn) = -1;
}
}
}
+
if (NONDEBUG_INSN_P (insn))
df_simulate_one_insn_backwards (bb, insn, &live);
}
}
+
CLEAR_REG_SET (&live);
}
{
if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
{
- output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
- output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+ output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
+ output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
}
else
{
- output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
- output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
+ output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+ output_asm_insn ("str%?\t%0, [%1], %2", otherops);
}
}
else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
return 4;
}
+/* Return nonzero if the offset in the address is an immediate. Otherwise,
+ return zero. */
+
+int
+arm_address_offset_is_imm (rtx insn)
+{
+ rtx mem, addr;
+
+ extract_insn_cached (insn);
+
+ if (REG_P (recog_data.operand[0]))
+ return 0;
+
+ mem = recog_data.operand[0];
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (GET_CODE (addr) == REG
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == REG
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT))
+ return 1;
+ else
+ return 0;
+}
+
/* Output an ADD r, s, #n where n may be too big for one instruction.
If adding zero to one register, output nothing. */
const char *
&& !crtl->tail_call_emit)
{
unsigned long mask;
- mask = (1 << (arm_size_return_regs() / 4)) - 1;
+ /* Preserve return values, of any size. */
+ mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
mask ^= 0xf;
mask &= ~saved_regs_mask;
reg = 0;
using the EABI unwinder, to prevent faulting instructions from being
swapped with a stack adjustment. */
if (crtl->profile || !TARGET_SCHED_PROLOG
- || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
+ || (arm_except_unwind_info (&global_options) == UI_TARGET
+ && cfun->can_throw_non_call_exceptions))
emit_insn (gen_blockage ());
/* If the link register is being kept alive, with the return address in it,
{
rtx addr;
bool postinc = FALSE;
+ unsigned align, modesize, align_bits;
+
gcc_assert (GET_CODE (x) == MEM);
addr = XEXP (x, 0);
if (GET_CODE (addr) == POST_INC)
postinc = 1;
addr = XEXP (addr, 0);
}
- asm_fprintf (stream, "[%r]", REGNO (addr));
+ asm_fprintf (stream, "[%r", REGNO (addr));
+
+ /* We know the alignment of this access, so we can emit a hint in the
+ instruction (for some alignments) as an aid to the memory subsystem
+ of the target. */
+ align = MEM_ALIGN (x) >> 3;
+ modesize = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Only certain alignment specifiers are supported by the hardware. */
+ if (modesize == 16 && (align % 32) == 0)
+ align_bits = 256;
+ else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+ align_bits = 128;
+ else if ((align % 8) == 0)
+ align_bits = 64;
+ else
+ align_bits = 0;
+
+ if (align_bits != 0)
+ asm_fprintf (stream, ":%d", align_bits);
+
+ asm_fprintf (stream, "]");
+
if (postinc)
fputs("!", stream);
}
return;
+ case 'C':
+ {
+ rtx addr;
+
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ gcc_assert (GET_CODE (addr) == REG);
+ asm_fprintf (stream, "[%r]", REGNO (addr));
+ }
+ return;
+
/* Translate an S register number into a D register number and element index. */
case 'y':
{
static enum insn_code
locate_neon_builtin_icode (int fcode, neon_itype *itype)
{
- neon_builtin_datum key, *found;
+ neon_builtin_datum key
+ = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
+ neon_builtin_datum *found;
int idx;
key.base_fcode = fcode;
return;
}
- if (ARM_EABI_UNWIND_TABLES && push)
+ if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
{
fprintf (f, "\t.save\t{");
for (regno = 0; regno < 15; regno++)
using the EABI unwinder, to prevent faulting instructions from being
swapped with a stack adjustment. */
if (crtl->profile || !TARGET_SCHED_PROLOG
- || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
+ || (arm_except_unwind_info (&global_options) == UI_TARGET
+ && cfun->can_throw_non_call_exceptions))
emit_insn (gen_blockage ());
cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
if (crtl->args.pretend_args_size)
{
/* Output unwind directive for the stack adjustment. */
- if (ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info (&global_options) == UI_TARGET)
fprintf (f, "\t.pad #%d\n",
crtl->args.pretend_args_size);
work_register = thumb_find_work_register (live_regs_mask);
- if (ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info (&global_options) == UI_TARGET)
asm_fprintf (f, "\t.pad #16\n");
asm_fprintf
return !reg_overlap_mentioned_p (value, addr);
}
+/* Return nonzero if the CONSUMER instruction (a store) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+ return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+ rtx value = PATTERN (producer);
+ rtx addr = PATTERN (consumer);
+
+ if (GET_CODE (value) == COND_EXEC)
+ value = COND_EXEC_CODE (value);
+ if (GET_CODE (value) == PARALLEL)
+ value = XVECEXP (value, 0, 0);
+ value = XEXP (value, 0);
+ if (GET_CODE (addr) == COND_EXEC)
+ addr = COND_EXEC_CODE (addr);
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+ addr = XEXP (addr, 1);
+
+ return reg_overlap_mentioned_p (value, addr);
+}
+
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the
result of PRODUCER. */
return false;
}
+/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+ registers when autovectorizing for Neon, at least until multiple vector
+ widths are supported properly by the middle-end. */
+
+static enum machine_mode
+arm_preferred_simd_mode (enum machine_mode mode)
+{
+ if (TARGET_NEON)
+ switch (mode)
+ {
+ case SFmode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+ case SImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+ case HImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+ case QImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+ case DImode:
+ if (TARGET_NEON_VECTORIZE_QUAD)
+ return V2DImode;
+ break;
+
+ default:;
+ }
+
+ if (TARGET_REALLY_IWMMXT)
+ switch (mode)
+ {
+ case SImode:
+ return V2SImode;
+ case HImode:
+ return V4HImode;
+ case QImode:
+ return V8QImode;
+
+ default:;
+ }
+
+ return word_mode;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+
+ We need to define this for LO_REGS on thumb. Otherwise we can end up
+ using r0-r4 for function arguments, r7 for the stack frame and don't
+ have enough left over to do doubleword arithmetic. */
+
+static bool
+arm_class_likely_spilled_p (reg_class_t rclass)
+{
+ if ((TARGET_THUMB && rclass == LO_REGS)
+ || rclass == CC_REG)
+ return true;
+
+ return false;
+}
+
/* Implements target hook small_register_classes_for_mode_p. */
bool
arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
return p;
}
-#ifdef TARGET_UNWIND_INFO
+#if ARM_UNWIND_INFO
/* Emit unwind directives for a store-multiple instruction or stack pointer
push during alignment.
These should only ever be generated by the function prologue code, so
{
rtx pat;
- if (!ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info (&global_options) != UI_TARGET)
return;
if (!(flag_unwind_tables || crtl->uses_eh_lsda)
return TRUE;
}
-#endif /* TARGET_UNWIND_INFO */
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
+
+static void
+arm_asm_emit_except_personality (rtx personality)
+{
+ fputs ("\t.personality\t", asm_out_file);
+ output_addr_const (asm_out_file, personality);
+ fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
+
+static void
+arm_asm_init_sections (void)
+{
+ exception_section = get_unnamed_section (0, output_section_asm_op,
+ "\t.handlerdata");
+}
+#endif /* ARM_UNWIND_INFO */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO. */
+
+static enum unwind_info_type
+arm_except_unwind_info (struct gcc_options *opts)
+{
+ /* Honor the --enable-sjlj-exceptions configure switch. */
+#ifdef CONFIG_SJLJ_EXCEPTIONS
+ if (CONFIG_SJLJ_EXCEPTIONS)
+ return UI_SJLJ;
+#endif
+
+ /* If not using ARM EABI unwind tables... */
+ if (ARM_UNWIND_INFO)
+ {
+ /* For simplicity elsewhere in this file, indicate that all unwind
+ info is disabled if we're not emitting unwind tables. */
+ if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
+ return UI_NONE;
+ else
+ return UI_TARGET;
+ }
+
+ /* ... we use sjlj exceptions for backwards compatibility. */
+ return UI_SJLJ;
+}
/* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
void
arm_output_fn_unwind (FILE * f, bool prologue)
{
- if (!ARM_EABI_UNWIND_TABLES)
+ if (arm_except_unwind_info (&global_options) != UI_TARGET)
return;
if (prologue)
fputs ("(tlsldo)", file);
}
-bool
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
+
+static bool
arm_output_addr_const_extra (FILE *fp, rtx x)
{
if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
{
case cortexr4:
case cortexr4f:
+ case cortexa5:
case cortexa8:
case cortexa9:
return 2;
sizeof (thumb_core_reg_alloc_order));
}
-/* Set default optimization options. */
-void
-arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
-{
- /* Enable section anchors by default at -O1 or higher.
- Use 2 to distinguish from an explicit -fsection-anchors
- given on the command line. */
- if (level > 0)
- flag_section_anchors = 2;
-}
-
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
bool
return !TARGET_THUMB1;
}
+/* Legitimize a memory reference for sync primitive implemented using
+ ldrex / strex. We currently force the form of the reference to be
+ indirect without offset. We do not yet support the indirect offset
+ addressing supported by some ARM targets for these
+ instructions. */
+static rtx
+arm_legitimize_sync_memory (rtx memory)
+{
+ rtx addr = force_reg (Pmode, XEXP (memory, 0));
+ rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
+
+ set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
+ MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
+ return legitimate_memory;
+}
+
+/* An instruction emitter. */
+typedef void (* emit_f) (int label, const char *, rtx *);
+
+/* An instruction emitter that emits via the conventional
+ output_asm_insn. */
+static void
+arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
+{
+ output_asm_insn (pattern, operands);
+}
+
+/* Count the number of emitted synchronization instructions. */
+static unsigned arm_insn_count;
+
+/* An emitter that counts emitted instructions but does not actually
+ emit instruction into the the instruction stream. */
+static void
+arm_count (int label,
+ const char *pattern ATTRIBUTE_UNUSED,
+ rtx *operands ATTRIBUTE_UNUSED)
+{
+ if (! label)
+ ++ arm_insn_count;
+}
+
+/* Construct a pattern using conventional output formatting and feed
+ it to output_asm_insn. Provides a mechanism to construct the
+ output pattern on the fly. Note the hard limit on the pattern
+ buffer size. */
+static void ATTRIBUTE_PRINTF_4
+arm_output_asm_insn (emit_f emit, int label, rtx *operands,
+ const char *pattern, ...)
+{
+ va_list ap;
+ char buffer[256];
+
+ va_start (ap, pattern);
+ vsprintf (buffer, pattern, ap);
+ va_end (ap);
+ emit (label, buffer, operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target to a specified emitter. */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+ if (TARGET_HAVE_DMB)
+ {
+ /* Note we issue a system level barrier. We should consider
+ issuing a inner shareabilty zone barrier here instead, ie.
+ "DMB ISH". */
+ emit (0, "dmb\tsy", operands);
+ return;
+ }
+
+ if (TARGET_HAVE_DMB_MCR)
+ {
+ emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+ return;
+ }
+
+ gcc_unreachable ();
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target. */
+const char *
+arm_output_memory_barrier (rtx *operands)
+{
+ arm_process_output_memory_barrier (arm_emit, operands);
+ return "";
+}
+
+/* Helper to figure out the instruction suffix required on ldrex/strex
+ for operations on an object of the specified mode. */
+static const char *
+arm_ldrex_suffix (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case QImode: return "b";
+ case HImode: return "h";
+ case SImode: return "";
+ case DImode: return "d";
+ default:
+ gcc_unreachable ();
+ }
+ return "";
+}
+
+/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_ldrex (emit_f emit,
+ enum machine_mode mode,
+ rtx target,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[2];
+
+ operands[0] = target;
+ operands[1] = memory;
+ arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+}
+
+/* Emit a strex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_strex (emit_f emit,
+ enum machine_mode mode,
+ const char *cc,
+ rtx result,
+ rtx value,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[3];
+
+ operands[0] = result;
+ operands[1] = value;
+ operands[2] = memory;
+ arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+ cc);
+}
+
+/* Helper to emit a two operand instruction. */
+static void
+arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
+{
+ rtx operands[2];
+
+ operands[0] = d;
+ operands[1] = s;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
+}
+
+/* Helper to emit a three operand instruction. */
+static void
+arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
+{
+ rtx operands[3];
+
+ operands[0] = d;
+ operands[1] = a;
+ operands[2] = b;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
+}
+
+/* Emit a load store exclusive synchronization loop.
+
+ do
+ old_value = [mem]
+ if old_value != required_value
+ break;
+ t1 = sync_op (old_value, new_value)
+ [mem] = t1, t2 = [0|1]
+ while ! t2
+
+ Note:
+ t1 == t2 is not permitted
+ t1 == old_value is permitted
+
+ required_value:
+
+ RTX register or const_int representing the required old_value for
+ the modify to continue, if NULL no comparsion is performed. */
+static void
+arm_output_sync_loop (emit_f emit,
+ enum machine_mode mode,
+ rtx old_value,
+ rtx memory,
+ rtx required_value,
+ rtx new_value,
+ rtx t1,
+ rtx t2,
+ enum attr_sync_op sync_op,
+ int early_barrier_required)
+{
+ rtx operands[1];
+
+ gcc_assert (t1 != t2);
+
+ if (early_barrier_required)
+ arm_process_output_memory_barrier (emit, NULL);
+
+ arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
+
+ arm_output_ldrex (emit, mode, old_value, memory);
+
+ if (required_value)
+ {
+ rtx operands[2];
+
+ operands[0] = old_value;
+ operands[1] = required_value;
+ arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+ }
+
+ switch (sync_op)
+ {
+ case SYNC_OP_ADD:
+ arm_output_op3 (emit, "add", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_SUB:
+ arm_output_op3 (emit, "sub", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_IOR:
+ arm_output_op3 (emit, "orr", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_XOR:
+ arm_output_op3 (emit, "eor", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_AND:
+ arm_output_op3 (emit,"and", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_NAND:
+ arm_output_op3 (emit, "and", t1, old_value, new_value);
+ arm_output_op2 (emit, "mvn", t1, t1);
+ break;
+
+ case SYNC_OP_NONE:
+ t1 = new_value;
+ break;
+ }
+
+ arm_output_strex (emit, mode, "", t2, t1, memory);
+ operands[0] = t2;
+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
+
+ arm_process_output_memory_barrier (emit, NULL);
+ arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+}
+
+static rtx
+arm_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+ if (index > 0)
+ default_value = operands[index - 1];
+
+ return default_value;
+}
+
+#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
+ arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
+
+/* Extract the operands for a synchroniztion instruction from the
+ instructions attributes and emit the instruction. */
+static void
+arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
+{
+ rtx result, memory, required_value, new_value, t1, t2;
+ int early_barrier;
+ enum machine_mode mode;
+ enum attr_sync_op sync_op;
+
+ result = FETCH_SYNC_OPERAND(result, 0);
+ memory = FETCH_SYNC_OPERAND(memory, 0);
+ required_value = FETCH_SYNC_OPERAND(required_value, 0);
+ new_value = FETCH_SYNC_OPERAND(new_value, 0);
+ t1 = FETCH_SYNC_OPERAND(t1, 0);
+ t2 = FETCH_SYNC_OPERAND(t2, 0);
+ early_barrier =
+ get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
+ sync_op = get_attr_sync_op (insn);
+ mode = GET_MODE (memory);
+
+ arm_output_sync_loop (emit, mode, result, memory, required_value,
+ new_value, t1, t2, sync_op, early_barrier);
+}
+
+/* Emit a synchronization instruction loop. */
+const char *
+arm_output_sync_insn (rtx insn, rtx *operands)
+{
+ arm_process_output_sync_insn (arm_emit, insn, operands);
+ return "";
+}
+
+/* Count the number of machine instruction that will be emitted for a
+ synchronization instruction. Note that the emitter used does not
+ emit instructions, it just counts instructions being carefull not
+ to count labels. */
+unsigned int
+arm_sync_loop_insns (rtx insn, rtx *operands)
+{
+ arm_insn_count = 0;
+ arm_process_output_sync_insn (arm_count, insn, operands);
+ return arm_insn_count;
+}
+
+/* Helper to call a target sync instruction generator, dealing with
+ the variation in operands required by the different generators. */
+static rtx
+arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
+ rtx memory, rtx required_value, rtx new_value)
+{
+ switch (generator->op)
+ {
+ case arm_sync_generator_omn:
+ gcc_assert (! required_value);
+ return generator->u.omn (old_value, memory, new_value);
+
+ case arm_sync_generator_omrn:
+ gcc_assert (required_value);
+ return generator->u.omrn (old_value, memory, required_value, new_value);
+ }
+
+ return NULL;
+}
+
+/* Expand a synchronization loop. The synchronization loop is expanded
+ as an opaque block of instructions in order to ensure that we do
+ not subsequently get extraneous memory accesses inserted within the
+ critical region. The exclusive access property of ldrex/strex is
+ only guaranteed in there are no intervening memory accesses. */
+void
+arm_expand_sync (enum machine_mode mode,
+ struct arm_sync_generator *generator,
+ rtx target, rtx memory, rtx required_value, rtx new_value)
+{
+ if (target == NULL)
+ target = gen_reg_rtx (mode);
+
+ memory = arm_legitimize_sync_memory (memory);
+ if (mode != SImode)
+ {
+ rtx load_temp = gen_reg_rtx (SImode);
+
+ if (required_value)
+ required_value = convert_modes (SImode, mode, required_value, true);
+
+ new_value = convert_modes (SImode, mode, new_value, true);
+ emit_insn (arm_call_generator (generator, load_temp, memory,
+ required_value, new_value));
+ emit_move_insn (target, gen_lowpart (mode, load_temp));
+ }
+ else
+ {
+ emit_insn (arm_call_generator (generator, target, memory, required_value,
+ new_value));
+ }
+}
+
+static bool
+arm_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+ /* Vectors which aren't in packed structures will not be less aligned than
+ the natural alignment of their element type, so this is safe. */
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ return !is_packed;
+
+ return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type, int misalignment,
+ bool is_packed)
+{
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
+
+ if (is_packed)
+ return align == 1;
+
+ /* If the misalignment is unknown, we should be able to handle the access
+ so long as it is not to a member of a packed data structure. */
+ if (misalignment == -1)
+ return true;
+
+ /* Return true if the misalignment is a multiple of the natural alignment
+ of the vector's element type. This is probably always going to be
+ true in practice, since we've already established that this isn't a
+ packed access. */
+ return ((misalignment % align) == 0);
+ }
+
+ return default_builtin_support_vector_misalignment (mode, type, misalignment,
+ is_packed);
+}
+
+static void
+arm_conditional_register_usage (void)
+{
+ int regno;
+
+ if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
+ {
+ for (regno = FIRST_FPA_REGNUM;
+ regno <= LAST_FPA_REGNUM; ++regno)
+ fixed_regs[regno] = call_used_regs[regno] = 1;
+ }
+
+ if (TARGET_THUMB1 && optimize_size)
+ {
+ /* When optimizing for size on Thumb-1, it's better not
+ to use the HI regs, because of the overhead of
+ stacking them. */
+ for (regno = FIRST_HI_REGNUM;
+ regno <= LAST_HI_REGNUM; ++regno)
+ fixed_regs[regno] = call_used_regs[regno] = 1;
+ }
+
+ /* The link register can be clobbered by any branch insn,
+ but we have no way to track that at present, so mark
+ it as unavailable. */
+ if (TARGET_THUMB1)
+ fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
+
+ if (TARGET_32BIT && TARGET_HARD_FLOAT)
+ {
+ if (TARGET_MAVERICK)
+ {
+ for (regno = FIRST_FPA_REGNUM;
+ regno <= LAST_FPA_REGNUM; ++ regno)
+ fixed_regs[regno] = call_used_regs[regno] = 1;
+ for (regno = FIRST_CIRRUS_FP_REGNUM;
+ regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
+ {
+ fixed_regs[regno] = 0;
+ call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
+ }
+ }
+ if (TARGET_VFP)
+ {
+ /* VFPv3 registers are disabled when earlier VFP
+ versions are selected due to the definition of
+ LAST_VFP_REGNUM. */
+ for (regno = FIRST_VFP_REGNUM;
+ regno <= LAST_VFP_REGNUM; ++ regno)
+ {
+ fixed_regs[regno] = 0;
+ call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
+ || regno >= FIRST_VFP_REGNUM + 32;
+ }
+ }
+ }
+
+ if (TARGET_REALLY_IWMMXT)
+ {
+ regno = FIRST_IWMMXT_GR_REGNUM;
+ /* The 2002/10/09 revision of the XScale ABI has wCG0
+ and wCG1 as call-preserved registers. The 2002/11/21
+ revision changed this so that all wCG registers are
+ scratch registers. */
+ for (regno = FIRST_IWMMXT_GR_REGNUM;
+ regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
+ fixed_regs[regno] = 0;
+ /* The XScale ABI has wR0 - wR9 as scratch registers,
+ the rest as call-preserved registers. */
+ for (regno = FIRST_IWMMXT_REGNUM;
+ regno <= LAST_IWMMXT_REGNUM; ++ regno)
+ {
+ fixed_regs[regno] = 0;
+ call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
+ }
+ }
+
+ if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+ {
+ fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+ call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+ }
+ else if (TARGET_APCS_STACK)
+ {
+ fixed_regs[10] = 1;
+ call_used_regs[10] = 1;
+ }
+ /* -mcaller-super-interworking reserves r11 for calls to
+ _interwork_r11_call_via_rN(). Making the register global
+ is an easy way of ensuring that it remains valid for all
+ calls. */
+ if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
+ || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
+ {
+ fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+ call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+ if (TARGET_CALLER_INTERWORKING)
+ global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+ }
+ SUBTARGET_CONDITIONAL_REGISTER_USAGE
+}
+
+static reg_class_t
+arm_preferred_rename_class (reg_class_t class)
+{
+ /* Thumb-2 instructions using LO_REGS may be smaller than instructions
+ using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
+ and code size can be reduced. */
+ if (TARGET_THUMB2 && class == GENERAL_REGS)
+ return LO_REGS;
+ else
+ return NO_REGS;
+}
+
#include "gt-arm.h"