1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
148 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
149 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_rtx_costs (rtx, int, int, int *, bool);
154 static int arm_address_cost (rtx, bool);
155 static bool arm_memory_load_p (rtx);
156 static bool arm_cirrus_insn_p (rtx);
157 static void cirrus_reorg (rtx);
158 static void arm_init_builtins (void);
159 static void arm_init_iwmmxt_builtins (void);
160 static rtx safe_vector_operand (rtx, enum machine_mode);
161 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
162 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
163 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
164 static void emit_constant_insn (rtx cond, rtx pattern);
165 static rtx emit_set_insn (rtx, rtx);
166 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
168 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
170 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
172 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
173 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
175 static int aapcs_select_return_coproc (const_tree, const_tree);
177 #ifdef OBJECT_FORMAT_ELF
178 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
179 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_encode_section_info (tree, rtx, int);
185 static void arm_file_end (void);
186 static void arm_file_start (void);
188 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
191 enum machine_mode, const_tree, bool);
192 static bool arm_promote_prototypes (const_tree);
193 static bool arm_default_short_enums (void);
194 static bool arm_align_anon_bitfield (void);
195 static bool arm_return_in_msb (const_tree);
196 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
197 static bool arm_return_in_memory (const_tree, const_tree);
199 static void arm_unwind_emit (FILE *, rtx);
200 static bool arm_output_ttype (rtx);
201 static void arm_asm_emit_except_personality (rtx);
202 static void arm_asm_init_sections (void);
204 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
205 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
206 static rtx arm_dwarf_register_span (rtx);
208 static tree arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree arm_get_cookie_size (tree);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree, rtx);
220 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
221 static void arm_option_override (void);
222 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
223 static bool arm_cannot_copy_insn_p (rtx);
224 static bool arm_tls_symbol_p (rtx x);
225 static int arm_issue_rate (void);
226 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
227 static bool arm_output_addr_const_extra (FILE *, rtx);
228 static bool arm_allocate_stack_slots_for_args (void);
229 static const char *arm_invalid_parameter_type (const_tree t);
230 static const char *arm_invalid_return_type (const_tree t);
231 static tree arm_promoted_type (const_tree t);
232 static tree arm_convert_to_type (tree type, tree expr);
233 static bool arm_scalar_mode_supported_p (enum machine_mode);
234 static bool arm_frame_pointer_required (void);
235 static bool arm_can_eliminate (const int, const int);
236 static void arm_asm_trampoline_template (FILE *);
237 static void arm_trampoline_init (rtx, tree, rtx);
238 static rtx arm_trampoline_adjust_address (rtx);
239 static rtx arm_pic_static_addr (rtx orig, rtx reg);
240 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
241 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
250 static void arm_conditional_register_usage (void);
251 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
252 static unsigned int arm_autovectorize_vector_sizes (void);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
259 affects_type_identity } */
260 /* Function calls made to this symbol must be done indirectly, because
261 it may lie outside of the 26 bit addressing range of a normal function
263 { "long_call", 0, 0, false, true, true, NULL, false },
264 /* Whereas these functions are always known to reside within the 26 bit
266 { "short_call", 0, 0, false, true, true, NULL, false },
267 /* Specify the procedure call conventions for a function. */
268 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
270 /* Interrupt Service Routines have special prologue and epilogue requirements. */
271 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
273 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
275 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
278 /* ARM/PE has three new attributes:
280 dllexport - for exporting a function/variable that will live in a dll
281 dllimport - for importing a function/variable from a dll
283 Microsoft allows multiple declspecs in one __declspec, separating
284 them with spaces. We do NOT support this. Instead, use __declspec
287 { "dllimport", 0, 0, true, false, false, NULL, false },
288 { "dllexport", 0, 0, true, false, false, NULL, false },
289 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
291 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
292 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
293 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
294 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
297 { NULL, 0, 0, false, false, false, NULL, false }
300 /* Set default optimization options. */
301 static const struct default_options arm_option_optimization_table[] =
303 /* Enable section anchors by default at -O1 or higher. */
304 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
305 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
306 { OPT_LEVELS_NONE, 0, NULL, 0 }
309 /* Initialize the GCC target structure. */
310 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 #undef TARGET_MERGE_DECL_ATTRIBUTES
312 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
315 #undef TARGET_LEGITIMIZE_ADDRESS
316 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
318 #undef TARGET_ATTRIBUTE_TABLE
319 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
321 #undef TARGET_ASM_FILE_START
322 #define TARGET_ASM_FILE_START arm_file_start
323 #undef TARGET_ASM_FILE_END
324 #define TARGET_ASM_FILE_END arm_file_end
326 #undef TARGET_ASM_ALIGNED_SI_OP
327 #define TARGET_ASM_ALIGNED_SI_OP NULL
328 #undef TARGET_ASM_INTEGER
329 #define TARGET_ASM_INTEGER arm_assemble_integer
331 #undef TARGET_PRINT_OPERAND
332 #define TARGET_PRINT_OPERAND arm_print_operand
333 #undef TARGET_PRINT_OPERAND_ADDRESS
334 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
335 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
336 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
338 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
339 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
341 #undef TARGET_ASM_FUNCTION_PROLOGUE
342 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
344 #undef TARGET_ASM_FUNCTION_EPILOGUE
345 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
347 #undef TARGET_DEFAULT_TARGET_FLAGS
348 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
349 #undef TARGET_OPTION_OVERRIDE
350 #define TARGET_OPTION_OVERRIDE arm_option_override
351 #undef TARGET_OPTION_OPTIMIZATION_TABLE
352 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
354 #undef TARGET_COMP_TYPE_ATTRIBUTES
355 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
357 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
358 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
360 #undef TARGET_SCHED_ADJUST_COST
361 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
363 #undef TARGET_ENCODE_SECTION_INFO
365 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
367 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
370 #undef TARGET_STRIP_NAME_ENCODING
371 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
373 #undef TARGET_ASM_INTERNAL_LABEL
374 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
377 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
379 #undef TARGET_FUNCTION_VALUE
380 #define TARGET_FUNCTION_VALUE arm_function_value
382 #undef TARGET_LIBCALL_VALUE
383 #define TARGET_LIBCALL_VALUE arm_libcall_value
385 #undef TARGET_ASM_OUTPUT_MI_THUNK
386 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
390 #undef TARGET_RTX_COSTS
391 #define TARGET_RTX_COSTS arm_rtx_costs
392 #undef TARGET_ADDRESS_COST
393 #define TARGET_ADDRESS_COST arm_address_cost
395 #undef TARGET_SHIFT_TRUNCATION_MASK
396 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
397 #undef TARGET_VECTOR_MODE_SUPPORTED_P
398 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
399 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
400 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
401 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
402 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
403 arm_autovectorize_vector_sizes
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
408 #undef TARGET_INIT_BUILTINS
409 #define TARGET_INIT_BUILTINS arm_init_builtins
410 #undef TARGET_EXPAND_BUILTIN
411 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
413 #undef TARGET_INIT_LIBFUNCS
414 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
416 #undef TARGET_PROMOTE_FUNCTION_MODE
417 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
418 #undef TARGET_PROMOTE_PROTOTYPES
419 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
420 #undef TARGET_PASS_BY_REFERENCE
421 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
422 #undef TARGET_ARG_PARTIAL_BYTES
423 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
424 #undef TARGET_FUNCTION_ARG
425 #define TARGET_FUNCTION_ARG arm_function_arg
426 #undef TARGET_FUNCTION_ARG_ADVANCE
427 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
428 #undef TARGET_FUNCTION_ARG_BOUNDARY
429 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
434 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
435 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
437 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
438 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
439 #undef TARGET_TRAMPOLINE_INIT
440 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
441 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
442 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
444 #undef TARGET_DEFAULT_SHORT_ENUMS
445 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
447 #undef TARGET_ALIGN_ANON_BITFIELD
448 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
450 #undef TARGET_NARROW_VOLATILE_BITFIELD
451 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
453 #undef TARGET_CXX_GUARD_TYPE
454 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
456 #undef TARGET_CXX_GUARD_MASK_BIT
457 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
459 #undef TARGET_CXX_GET_COOKIE_SIZE
460 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
462 #undef TARGET_CXX_COOKIE_HAS_SIZE
463 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
465 #undef TARGET_CXX_CDTOR_RETURNS_THIS
466 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
468 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
469 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
471 #undef TARGET_CXX_USE_AEABI_ATEXIT
472 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
474 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
475 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
476 arm_cxx_determine_class_data_visibility
478 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
479 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
481 #undef TARGET_RETURN_IN_MSB
482 #define TARGET_RETURN_IN_MSB arm_return_in_msb
484 #undef TARGET_RETURN_IN_MEMORY
485 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
487 #undef TARGET_MUST_PASS_IN_STACK
488 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
491 #undef TARGET_ASM_UNWIND_EMIT
492 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
494 /* EABI unwinding tables use a different format for the typeinfo tables. */
495 #undef TARGET_ASM_TTYPE
496 #define TARGET_ASM_TTYPE arm_output_ttype
498 #undef TARGET_ARM_EABI_UNWINDER
499 #define TARGET_ARM_EABI_UNWINDER true
501 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
502 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
504 #undef TARGET_ASM_INIT_SECTIONS
505 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
506 #endif /* ARM_UNWIND_INFO */
508 #undef TARGET_EXCEPT_UNWIND_INFO
509 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
511 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
512 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
514 #undef TARGET_DWARF_REGISTER_SPAN
515 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
517 #undef TARGET_CANNOT_COPY_INSN_P
518 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
521 #undef TARGET_HAVE_TLS
522 #define TARGET_HAVE_TLS true
525 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
526 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
528 #undef TARGET_CANNOT_FORCE_CONST_MEM
529 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
531 #undef TARGET_MAX_ANCHOR_OFFSET
532 #define TARGET_MAX_ANCHOR_OFFSET 4095
534 /* The minimum is set such that the total size of the block
535 for a particular anchor is -4088 + 1 + 4095 bytes, which is
536 divisible by eight, ensuring natural spacing of anchors. */
537 #undef TARGET_MIN_ANCHOR_OFFSET
538 #define TARGET_MIN_ANCHOR_OFFSET -4088
540 #undef TARGET_SCHED_ISSUE_RATE
541 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
543 #undef TARGET_MANGLE_TYPE
544 #define TARGET_MANGLE_TYPE arm_mangle_type
546 #undef TARGET_BUILD_BUILTIN_VA_LIST
547 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
548 #undef TARGET_EXPAND_BUILTIN_VA_START
549 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
550 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
551 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
554 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
555 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
558 #undef TARGET_LEGITIMATE_ADDRESS_P
559 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
561 #undef TARGET_INVALID_PARAMETER_TYPE
562 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
564 #undef TARGET_INVALID_RETURN_TYPE
565 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
567 #undef TARGET_PROMOTED_TYPE
568 #define TARGET_PROMOTED_TYPE arm_promoted_type
570 #undef TARGET_CONVERT_TO_TYPE
571 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
573 #undef TARGET_SCALAR_MODE_SUPPORTED_P
574 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
576 #undef TARGET_FRAME_POINTER_REQUIRED
577 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
579 #undef TARGET_CAN_ELIMINATE
580 #define TARGET_CAN_ELIMINATE arm_can_eliminate
582 #undef TARGET_CONDITIONAL_REGISTER_USAGE
583 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
585 #undef TARGET_CLASS_LIKELY_SPILLED_P
586 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
588 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
589 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
590 arm_vector_alignment_reachable
592 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
593 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
594 arm_builtin_support_vector_misalignment
596 #undef TARGET_PREFERRED_RENAME_CLASS
597 #define TARGET_PREFERRED_RENAME_CLASS \
598 arm_preferred_rename_class
600 struct gcc_target targetm = TARGET_INITIALIZER;
602 /* Obstack for minipool constant handling. */
603 static struct obstack minipool_obstack;
604 static char * minipool_startobj;
606 /* The maximum number of insns skipped which
607 will be conditionalised if possible. */
608 static int max_insns_skipped = 5;
610 extern FILE * asm_out_file;
612 /* True if we are currently building a constant table. */
613 int making_const_table;
615 /* The processor for which instructions should be scheduled. */
616 enum processor_type arm_tune = arm_none;
618 /* The current tuning set. */
619 const struct tune_params *current_tune;
621 /* Which floating point hardware to schedule for. */
624 /* Which floating popint hardware to use. */
625 const struct arm_fpu_desc *arm_fpu_desc;
627 /* Whether to use floating point hardware. */
628 enum float_abi_type arm_float_abi;
630 /* Which __fp16 format to use. */
631 enum arm_fp16_format_type arm_fp16_format;
633 /* Which ABI to use. */
634 enum arm_abi_type arm_abi;
636 /* Which thread pointer model to use. */
637 enum arm_tp_type target_thread_pointer = TP_AUTO;
639 /* Used to parse -mstructure_size_boundary command line option. */
640 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
642 /* Used for Thumb call_via trampolines. */
643 rtx thumb_call_via_label[14];
644 static int thumb_call_reg_needed;
646 /* Bit values used to identify processor capabilities. */
647 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
648 #define FL_ARCH3M (1 << 1) /* Extended multiply */
649 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
650 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
651 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
652 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
653 #define FL_THUMB (1 << 6) /* Thumb aware */
654 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
655 #define FL_STRONG (1 << 8) /* StrongARM */
656 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
657 #define FL_XSCALE (1 << 10) /* XScale */
658 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
659 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
660 media instructions. */
661 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
662 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
663 Note: ARM6 & 7 derivatives only. */
664 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
665 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
666 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
668 #define FL_DIV (1 << 18) /* Hardware divide. */
669 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
670 #define FL_NEON (1 << 20) /* Neon instructions. */
671 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
673 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
675 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
677 /* Flags that only effect tuning, not available instructions. */
678 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
681 #define FL_FOR_ARCH2 FL_NOTM
682 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
683 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
684 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
685 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
686 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
687 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
688 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
689 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
690 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
691 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
692 #define FL_FOR_ARCH6J FL_FOR_ARCH6
693 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
694 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
695 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
696 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
697 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
698 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
699 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
700 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
701 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
702 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
704 /* The bits in this mask specify which
705 instructions we are allowed to generate. */
706 static unsigned long insn_flags = 0;
708 /* The bits in this mask specify which instruction scheduling options should
710 static unsigned long tune_flags = 0;
712 /* The following are used in the arm.md file as equivalents to bits
713 in the above two flag variables. */
715 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
721 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
724 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
727 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
730 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
733 /* Nonzero if this chip supports the ARM 6K extensions. */
736 /* Nonzero if this chip supports the ARM 7 extensions. */
739 /* Nonzero if instructions not present in the 'M' profile can be used. */
740 int arm_arch_notm = 0;
742 /* Nonzero if instructions present in ARMv7E-M can be used. */
745 /* Nonzero if this chip can benefit from load scheduling. */
746 int arm_ld_sched = 0;
748 /* Nonzero if this chip is a StrongARM. */
749 int arm_tune_strongarm = 0;
751 /* Nonzero if this chip is a Cirrus variant. */
752 int arm_arch_cirrus = 0;
754 /* Nonzero if this chip supports Intel Wireless MMX technology. */
755 int arm_arch_iwmmxt = 0;
757 /* Nonzero if this chip is an XScale. */
758 int arm_arch_xscale = 0;
760 /* Nonzero if tuning for XScale */
761 int arm_tune_xscale = 0;
763 /* Nonzero if we want to tune for stores that access the write-buffer.
764 This typically means an ARM6 or ARM7 with MMU or MPU. */
765 int arm_tune_wbuf = 0;
767 /* Nonzero if tuning for Cortex-A9. */
768 int arm_tune_cortex_a9 = 0;
770 /* Nonzero if generating Thumb instructions. */
773 /* Nonzero if generating Thumb-1 instructions. */
776 /* Nonzero if we should define __THUMB_INTERWORK__ in the
778 XXX This is a bit of a hack, it's intended to help work around
779 problems in GLD which doesn't understand that armv5t code is
780 interworking clean. */
781 int arm_cpp_interwork = 0;
783 /* Nonzero if chip supports Thumb 2. */
786 /* Nonzero if chip supports integer division instruction. */
789 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
790 we must report the mode of the memory reference from
791 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
792 enum machine_mode output_memory_reference_mode;
794 /* The register number to be used for the PIC offset register. */
795 unsigned arm_pic_register = INVALID_REGNUM;
797 /* Set to 1 after arm_reorg has started. Reset to start at the start of
798 the next function. */
799 static int after_arm_reorg = 0;
801 enum arm_pcs arm_pcs_default;
803 /* For an explanation of these variables, see final_prescan_insn below. */
805 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
806 enum arm_cond_code arm_current_cc;
809 int arm_target_label;
810 /* The number of conditionally executed insns, including the current insn. */
811 int arm_condexec_count = 0;
812 /* A bitmask specifying the patterns for the IT block.
813 Zero means do not output an IT block before this insn. */
814 int arm_condexec_mask = 0;
815 /* The number of bits used in arm_condexec_mask. */
816 int arm_condexec_masklen = 0;
818 /* The condition codes of the ARM, and the inverse function. */
819 static const char * const arm_condition_codes[] =
821 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
822 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
825 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
826 int arm_regs_in_sequence[] =
828 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
831 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
832 #define streq(string1, string2) (strcmp (string1, string2) == 0)
834 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
835 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
836 | (1 << PIC_OFFSET_TABLE_REGNUM)))
838 /* Initialization code. */
842 const char *const name;
843 enum processor_type core;
845 const unsigned long flags;
846 const struct tune_params *const tune;
850 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
851 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
856 const struct tune_params arm_slowmul_tune =
858 arm_slowmul_rtx_costs,
861 ARM_PREFETCH_NOT_BENEFICIAL
864 const struct tune_params arm_fastmul_tune =
866 arm_fastmul_rtx_costs,
869 ARM_PREFETCH_NOT_BENEFICIAL
872 const struct tune_params arm_xscale_tune =
874 arm_xscale_rtx_costs,
875 xscale_sched_adjust_cost,
877 ARM_PREFETCH_NOT_BENEFICIAL
880 const struct tune_params arm_9e_tune =
885 ARM_PREFETCH_NOT_BENEFICIAL
888 const struct tune_params arm_cortex_a9_tune =
891 cortex_a9_sched_adjust_cost,
893 ARM_PREFETCH_BENEFICIAL(4,32,32)
896 const struct tune_params arm_fa726te_tune =
899 fa726te_sched_adjust_cost,
901 ARM_PREFETCH_NOT_BENEFICIAL
905 /* Not all of these give usefully different compilation alternatives,
906 but there is no simple way of generalizing them. */
907 static const struct processors all_cores[] =
910 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
911 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
912 #include "arm-cores.def"
914 {NULL, arm_none, NULL, 0, NULL}
917 static const struct processors all_architectures[] =
919 /* ARM Architectures */
920 /* We don't specify tuning costs here as it will be figured out
923 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
924 {NAME, CORE, #ARCH, FLAGS, NULL},
925 #include "arm-arches.def"
927 {NULL, arm_none, NULL, 0 , NULL}
931 /* These are populated as commandline arguments are processed, or NULL
933 static const struct processors *arm_selected_arch;
934 static const struct processors *arm_selected_cpu;
935 static const struct processors *arm_selected_tune;
937 /* The name of the preprocessor macro to define for this architecture. */
939 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
941 /* Available values for -mfpu=. */
943 static const struct arm_fpu_desc all_fpus[] =
945 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
946 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
947 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
948 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
949 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
950 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
951 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
952 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
953 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
954 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
955 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
956 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
957 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
958 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
959 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
960 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
961 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
962 /* Compatibility aliases. */
963 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
970 enum float_abi_type abi_type;
974 /* Available values for -mfloat-abi=. */
976 static const struct float_abi all_float_abis[] =
978 {"soft", ARM_FLOAT_ABI_SOFT},
979 {"softfp", ARM_FLOAT_ABI_SOFTFP},
980 {"hard", ARM_FLOAT_ABI_HARD}
987 enum arm_fp16_format_type fp16_format_type;
991 /* Available values for -mfp16-format=. */
993 static const struct fp16_format all_fp16_formats[] =
995 {"none", ARM_FP16_FORMAT_NONE},
996 {"ieee", ARM_FP16_FORMAT_IEEE},
997 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1004 enum arm_abi_type abi_type;
1008 /* Available values for -mabi=. */
1010 static const struct abi_name arm_all_abis[] =
1012 {"apcs-gnu", ARM_ABI_APCS},
1013 {"atpcs", ARM_ABI_ATPCS},
1014 {"aapcs", ARM_ABI_AAPCS},
1015 {"iwmmxt", ARM_ABI_IWMMXT},
1016 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1019 /* Supported TLS relocations. */
1029 /* The maximum number of insns to be used when loading a constant. */
1031 arm_constant_limit (bool size_p)
1033 return size_p ? 1 : current_tune->constant_limit;
1036 /* Emit an insn that's a simple single-set. Both the operands must be known
1039 emit_set_insn (rtx x, rtx y)
1041 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1044 /* Return the number of bits set in VALUE. */
1046 bit_count (unsigned long value)
1048 unsigned long count = 0;
1053 value &= value - 1; /* Clear the least-significant set bit. */
1059 /* Set up library functions unique to ARM. */
1062 arm_init_libfuncs (void)
1064 /* There are no special library functions unless we are using the
1069 /* The functions below are described in Section 4 of the "Run-Time
1070 ABI for the ARM architecture", Version 1.0. */
1072 /* Double-precision floating-point arithmetic. Table 2. */
1073 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1074 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1075 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1076 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1077 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1079 /* Double-precision comparisons. Table 3. */
1080 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1081 set_optab_libfunc (ne_optab, DFmode, NULL);
1082 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1083 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1084 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1085 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1086 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1088 /* Single-precision floating-point arithmetic. Table 4. */
1089 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1090 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1091 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1092 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1093 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1095 /* Single-precision comparisons. Table 5. */
1096 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1097 set_optab_libfunc (ne_optab, SFmode, NULL);
1098 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1099 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1100 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1101 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1102 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1104 /* Floating-point to integer conversions. Table 6. */
1105 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1106 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1107 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1108 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1109 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1110 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1111 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1112 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1114 /* Conversions between floating types. Table 7. */
1115 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1116 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1118 /* Integer to floating-point conversions. Table 8. */
1119 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1120 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1121 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1122 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1123 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1124 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1125 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1126 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1128 /* Long long. Table 9. */
1129 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1130 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1131 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1132 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1133 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1134 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1135 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1136 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1138 /* Integer (32/32->32) division. \S 4.3.1. */
1139 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1140 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1142 /* The divmod functions are designed so that they can be used for
1143 plain division, even though they return both the quotient and the
1144 remainder. The quotient is returned in the usual location (i.e.,
1145 r0 for SImode, {r0, r1} for DImode), just as would be expected
1146 for an ordinary division routine. Because the AAPCS calling
1147 conventions specify that all of { r0, r1, r2, r3 } are
1148 callee-saved registers, there is no need to tell the compiler
1149 explicitly that those registers are clobbered by these
1151 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1152 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1154 /* For SImode division the ABI provides div-without-mod routines,
1155 which are faster. */
1156 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1157 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1159 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1160 divmod libcalls instead. */
1161 set_optab_libfunc (smod_optab, DImode, NULL);
1162 set_optab_libfunc (umod_optab, DImode, NULL);
1163 set_optab_libfunc (smod_optab, SImode, NULL);
1164 set_optab_libfunc (umod_optab, SImode, NULL);
1166 /* Half-precision float operations. The compiler handles all operations
1167 with NULL libfuncs by converting the SFmode. */
1168 switch (arm_fp16_format)
1170 case ARM_FP16_FORMAT_IEEE:
1171 case ARM_FP16_FORMAT_ALTERNATIVE:
1174 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1175 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1177 : "__gnu_f2h_alternative"));
1178 set_conv_libfunc (sext_optab, SFmode, HFmode,
1179 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1181 : "__gnu_h2f_alternative"));
1184 set_optab_libfunc (add_optab, HFmode, NULL);
1185 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1186 set_optab_libfunc (smul_optab, HFmode, NULL);
1187 set_optab_libfunc (neg_optab, HFmode, NULL);
1188 set_optab_libfunc (sub_optab, HFmode, NULL);
1191 set_optab_libfunc (eq_optab, HFmode, NULL);
1192 set_optab_libfunc (ne_optab, HFmode, NULL);
1193 set_optab_libfunc (lt_optab, HFmode, NULL);
1194 set_optab_libfunc (le_optab, HFmode, NULL);
1195 set_optab_libfunc (ge_optab, HFmode, NULL);
1196 set_optab_libfunc (gt_optab, HFmode, NULL);
1197 set_optab_libfunc (unord_optab, HFmode, NULL);
1204 if (TARGET_AAPCS_BASED)
1205 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1208 /* On AAPCS systems, this is the "struct __va_list". */
1209 static GTY(()) tree va_list_type;
1211 /* Return the type to use as __builtin_va_list. */
1213 arm_build_builtin_va_list (void)
1218 if (!TARGET_AAPCS_BASED)
1219 return std_build_builtin_va_list ();
1221 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1229 The C Library ABI further reinforces this definition in \S
1232 We must follow this definition exactly. The structure tag
1233 name is visible in C++ mangled names, and thus forms a part
1234 of the ABI. The field name may be used by people who
1235 #include <stdarg.h>. */
1236 /* Create the type. */
1237 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1238 /* Give it the required name. */
1239 va_list_name = build_decl (BUILTINS_LOCATION,
1241 get_identifier ("__va_list"),
1243 DECL_ARTIFICIAL (va_list_name) = 1;
1244 TYPE_NAME (va_list_type) = va_list_name;
1245 TYPE_STUB_DECL (va_list_type) = va_list_name;
1246 /* Create the __ap field. */
1247 ap_field = build_decl (BUILTINS_LOCATION,
1249 get_identifier ("__ap"),
1251 DECL_ARTIFICIAL (ap_field) = 1;
1252 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1253 TYPE_FIELDS (va_list_type) = ap_field;
1254 /* Compute its layout. */
1255 layout_type (va_list_type);
1257 return va_list_type;
1260 /* Return an expression of type "void *" pointing to the next
1261 available argument in a variable-argument list. VALIST is the
1262 user-level va_list object, of type __builtin_va_list. */
1264 arm_extract_valist_ptr (tree valist)
1266 if (TREE_TYPE (valist) == error_mark_node)
1267 return error_mark_node;
1269 /* On an AAPCS target, the pointer is stored within "struct
1271 if (TARGET_AAPCS_BASED)
1273 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1274 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1275 valist, ap_field, NULL_TREE);
1281 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1283 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1285 valist = arm_extract_valist_ptr (valist);
1286 std_expand_builtin_va_start (valist, nextarg);
1289 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1291 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1294 valist = arm_extract_valist_ptr (valist);
1295 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1298 /* Fix up any incompatible options that the user has specified. */
1300 arm_option_override (void)
1304 if (global_options_set.x_arm_arch_option)
1305 arm_selected_arch = &all_architectures[arm_arch_option];
1307 if (global_options_set.x_arm_cpu_option)
1308 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1310 if (global_options_set.x_arm_tune_option)
1311 arm_selected_tune = &all_cores[(int) arm_tune_option];
1313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1314 SUBTARGET_OVERRIDE_OPTIONS;
1317 if (arm_selected_arch)
1319 if (arm_selected_cpu)
1321 /* Check for conflict between mcpu and march. */
1322 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1324 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1325 arm_selected_cpu->name, arm_selected_arch->name);
1326 /* -march wins for code generation.
1327 -mcpu wins for default tuning. */
1328 if (!arm_selected_tune)
1329 arm_selected_tune = arm_selected_cpu;
1331 arm_selected_cpu = arm_selected_arch;
1335 arm_selected_arch = NULL;
1338 /* Pick a CPU based on the architecture. */
1339 arm_selected_cpu = arm_selected_arch;
1342 /* If the user did not specify a processor, choose one for them. */
1343 if (!arm_selected_cpu)
1345 const struct processors * sel;
1346 unsigned int sought;
1348 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1349 if (!arm_selected_cpu->name)
1351 #ifdef SUBTARGET_CPU_DEFAULT
1352 /* Use the subtarget default CPU if none was specified by
1354 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1356 /* Default to ARM6. */
1357 if (!arm_selected_cpu->name)
1358 arm_selected_cpu = &all_cores[arm6];
1361 sel = arm_selected_cpu;
1362 insn_flags = sel->flags;
1364 /* Now check to see if the user has specified some command line
1365 switch that require certain abilities from the cpu. */
1368 if (TARGET_INTERWORK || TARGET_THUMB)
1370 sought |= (FL_THUMB | FL_MODE32);
1372 /* There are no ARM processors that support both APCS-26 and
1373 interworking. Therefore we force FL_MODE26 to be removed
1374 from insn_flags here (if it was set), so that the search
1375 below will always be able to find a compatible processor. */
1376 insn_flags &= ~FL_MODE26;
1379 if (sought != 0 && ((sought & insn_flags) != sought))
1381 /* Try to locate a CPU type that supports all of the abilities
1382 of the default CPU, plus the extra abilities requested by
1384 for (sel = all_cores; sel->name != NULL; sel++)
1385 if ((sel->flags & sought) == (sought | insn_flags))
1388 if (sel->name == NULL)
1390 unsigned current_bit_count = 0;
1391 const struct processors * best_fit = NULL;
1393 /* Ideally we would like to issue an error message here
1394 saying that it was not possible to find a CPU compatible
1395 with the default CPU, but which also supports the command
1396 line options specified by the programmer, and so they
1397 ought to use the -mcpu=<name> command line option to
1398 override the default CPU type.
1400 If we cannot find a cpu that has both the
1401 characteristics of the default cpu and the given
1402 command line options we scan the array again looking
1403 for a best match. */
1404 for (sel = all_cores; sel->name != NULL; sel++)
1405 if ((sel->flags & sought) == sought)
1409 count = bit_count (sel->flags & insn_flags);
1411 if (count >= current_bit_count)
1414 current_bit_count = count;
1418 gcc_assert (best_fit);
1422 arm_selected_cpu = sel;
1426 gcc_assert (arm_selected_cpu);
1427 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1428 if (!arm_selected_tune)
1429 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1431 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1432 insn_flags = arm_selected_cpu->flags;
1434 arm_tune = arm_selected_tune->core;
1435 tune_flags = arm_selected_tune->flags;
1436 current_tune = arm_selected_tune->tune;
1438 if (target_fp16_format_name)
1440 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1442 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1444 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1448 if (i == ARRAY_SIZE (all_fp16_formats))
1449 error ("invalid __fp16 format option: -mfp16-format=%s",
1450 target_fp16_format_name);
1453 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1455 if (target_abi_name)
1457 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1459 if (streq (arm_all_abis[i].name, target_abi_name))
1461 arm_abi = arm_all_abis[i].abi_type;
1465 if (i == ARRAY_SIZE (arm_all_abis))
1466 error ("invalid ABI option: -mabi=%s", target_abi_name);
1469 arm_abi = ARM_DEFAULT_ABI;
1471 /* Make sure that the processor choice does not conflict with any of the
1472 other command line choices. */
1473 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1474 error ("target CPU does not support ARM mode");
1476 /* BPABI targets use linker tricks to allow interworking on cores
1477 without thumb support. */
1478 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1480 warning (0, "target CPU does not support interworking" );
1481 target_flags &= ~MASK_INTERWORK;
1484 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1486 warning (0, "target CPU does not support THUMB instructions");
1487 target_flags &= ~MASK_THUMB;
1490 if (TARGET_APCS_FRAME && TARGET_THUMB)
1492 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1493 target_flags &= ~MASK_APCS_FRAME;
1496 /* Callee super interworking implies thumb interworking. Adding
1497 this to the flags here simplifies the logic elsewhere. */
1498 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1499 target_flags |= MASK_INTERWORK;
1501 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1502 from here where no function is being compiled currently. */
1503 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1504 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1506 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1507 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1509 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1511 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1512 target_flags |= MASK_APCS_FRAME;
1515 if (TARGET_POKE_FUNCTION_NAME)
1516 target_flags |= MASK_APCS_FRAME;
1518 if (TARGET_APCS_REENT && flag_pic)
1519 error ("-fpic and -mapcs-reent are incompatible");
1521 if (TARGET_APCS_REENT)
1522 warning (0, "APCS reentrant code not supported. Ignored");
1524 /* If this target is normally configured to use APCS frames, warn if they
1525 are turned off and debugging is turned on. */
1527 && write_symbols != NO_DEBUG
1528 && !TARGET_APCS_FRAME
1529 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1530 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1532 if (TARGET_APCS_FLOAT)
1533 warning (0, "passing floating point arguments in fp regs not yet supported");
1535 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1536 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1537 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1538 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1539 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1540 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1541 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1542 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1543 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1544 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1545 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1546 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1547 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1548 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1550 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1551 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1552 thumb_code = TARGET_ARM == 0;
1553 thumb1_code = TARGET_THUMB1 != 0;
1554 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1555 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1556 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1557 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1558 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1560 /* If we are not using the default (ARM mode) section anchor offset
1561 ranges, then set the correct ranges now. */
1564 /* Thumb-1 LDR instructions cannot have negative offsets.
1565 Permissible positive offset ranges are 5-bit (for byte loads),
1566 6-bit (for halfword loads), or 7-bit (for word loads).
1567 Empirical results suggest a 7-bit anchor range gives the best
1568 overall code size. */
1569 targetm.min_anchor_offset = 0;
1570 targetm.max_anchor_offset = 127;
1572 else if (TARGET_THUMB2)
1574 /* The minimum is set such that the total size of the block
1575 for a particular anchor is 248 + 1 + 4095 bytes, which is
1576 divisible by eight, ensuring natural spacing of anchors. */
1577 targetm.min_anchor_offset = -248;
1578 targetm.max_anchor_offset = 4095;
1581 /* V5 code we generate is completely interworking capable, so we turn off
1582 TARGET_INTERWORK here to avoid many tests later on. */
1584 /* XXX However, we must pass the right pre-processor defines to CPP
1585 or GLD can get confused. This is a hack. */
1586 if (TARGET_INTERWORK)
1587 arm_cpp_interwork = 1;
1590 target_flags &= ~MASK_INTERWORK;
1592 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1593 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1595 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1596 error ("iwmmxt abi requires an iwmmxt capable cpu");
1598 if (target_fpu_name == NULL && target_fpe_name != NULL)
1600 if (streq (target_fpe_name, "2"))
1601 target_fpu_name = "fpe2";
1602 else if (streq (target_fpe_name, "3"))
1603 target_fpu_name = "fpe3";
1605 error ("invalid floating point emulation option: -mfpe=%s",
1609 if (target_fpu_name == NULL)
1611 #ifdef FPUTYPE_DEFAULT
1612 target_fpu_name = FPUTYPE_DEFAULT;
1614 if (arm_arch_cirrus)
1615 target_fpu_name = "maverick";
1617 target_fpu_name = "fpe2";
1621 arm_fpu_desc = NULL;
1622 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1624 if (streq (all_fpus[i].name, target_fpu_name))
1626 arm_fpu_desc = &all_fpus[i];
1633 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1637 switch (arm_fpu_desc->model)
1639 case ARM_FP_MODEL_FPA:
1640 if (arm_fpu_desc->rev == 2)
1641 arm_fpu_attr = FPU_FPE2;
1642 else if (arm_fpu_desc->rev == 3)
1643 arm_fpu_attr = FPU_FPE3;
1645 arm_fpu_attr = FPU_FPA;
1648 case ARM_FP_MODEL_MAVERICK:
1649 arm_fpu_attr = FPU_MAVERICK;
1652 case ARM_FP_MODEL_VFP:
1653 arm_fpu_attr = FPU_VFP;
1660 if (target_float_abi_name != NULL)
1662 /* The user specified a FP ABI. */
1663 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1665 if (streq (all_float_abis[i].name, target_float_abi_name))
1667 arm_float_abi = all_float_abis[i].abi_type;
1671 if (i == ARRAY_SIZE (all_float_abis))
1672 error ("invalid floating point abi: -mfloat-abi=%s",
1673 target_float_abi_name);
1676 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1678 if (TARGET_AAPCS_BASED
1679 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1680 error ("FPA is unsupported in the AAPCS");
1682 if (TARGET_AAPCS_BASED)
1684 if (TARGET_CALLER_INTERWORKING)
1685 error ("AAPCS does not support -mcaller-super-interworking");
1687 if (TARGET_CALLEE_INTERWORKING)
1688 error ("AAPCS does not support -mcallee-super-interworking");
1691 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1692 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1693 will ever exist. GCC makes no attempt to support this combination. */
1694 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1695 sorry ("iWMMXt and hardware floating point");
1697 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1698 if (TARGET_THUMB2 && TARGET_IWMMXT)
1699 sorry ("Thumb-2 iWMMXt");
1701 /* __fp16 support currently assumes the core has ldrh. */
1702 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1703 sorry ("__fp16 and no ldrh");
1705 /* If soft-float is specified then don't use FPU. */
1706 if (TARGET_SOFT_FLOAT)
1707 arm_fpu_attr = FPU_NONE;
1709 if (TARGET_AAPCS_BASED)
1711 if (arm_abi == ARM_ABI_IWMMXT)
1712 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1713 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1714 && TARGET_HARD_FLOAT
1716 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1718 arm_pcs_default = ARM_PCS_AAPCS;
1722 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1723 sorry ("-mfloat-abi=hard and VFP");
1725 if (arm_abi == ARM_ABI_APCS)
1726 arm_pcs_default = ARM_PCS_APCS;
1728 arm_pcs_default = ARM_PCS_ATPCS;
1731 /* For arm2/3 there is no need to do any scheduling if there is only
1732 a floating point emulator, or we are doing software floating-point. */
1733 if ((TARGET_SOFT_FLOAT
1734 || (TARGET_FPA && arm_fpu_desc->rev))
1735 && (tune_flags & FL_MODE32) == 0)
1736 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1738 if (target_thread_switch)
1740 if (strcmp (target_thread_switch, "soft") == 0)
1741 target_thread_pointer = TP_SOFT;
1742 else if (strcmp (target_thread_switch, "auto") == 0)
1743 target_thread_pointer = TP_AUTO;
1744 else if (strcmp (target_thread_switch, "cp15") == 0)
1745 target_thread_pointer = TP_CP15;
1747 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1750 /* Use the cp15 method if it is available. */
1751 if (target_thread_pointer == TP_AUTO)
1753 if (arm_arch6k && !TARGET_THUMB1)
1754 target_thread_pointer = TP_CP15;
1756 target_thread_pointer = TP_SOFT;
1759 if (TARGET_HARD_TP && TARGET_THUMB1)
1760 error ("can not use -mtp=cp15 with 16-bit Thumb");
1762 /* Override the default structure alignment for AAPCS ABI. */
1763 if (TARGET_AAPCS_BASED)
1764 arm_structure_size_boundary = 8;
1766 if (structure_size_string != NULL)
1768 int size = strtol (structure_size_string, NULL, 0);
1770 if (size == 8 || size == 32
1771 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1772 arm_structure_size_boundary = size;
1774 warning (0, "structure size boundary can only be set to %s",
1775 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1778 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1780 error ("RTP PIC is incompatible with Thumb");
1784 /* If stack checking is disabled, we can use r10 as the PIC register,
1785 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1786 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1788 if (TARGET_VXWORKS_RTP)
1789 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1790 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1793 if (flag_pic && TARGET_VXWORKS_RTP)
1794 arm_pic_register = 9;
1796 if (arm_pic_register_string != NULL)
1798 int pic_register = decode_reg_name (arm_pic_register_string);
1801 warning (0, "-mpic-register= is useless without -fpic");
1803 /* Prevent the user from choosing an obviously stupid PIC register. */
1804 else if (pic_register < 0 || call_used_regs[pic_register]
1805 || pic_register == HARD_FRAME_POINTER_REGNUM
1806 || pic_register == STACK_POINTER_REGNUM
1807 || pic_register >= PC_REGNUM
1808 || (TARGET_VXWORKS_RTP
1809 && (unsigned int) pic_register != arm_pic_register))
1810 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1812 arm_pic_register = pic_register;
1815 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1816 if (fix_cm3_ldrd == 2)
1818 if (arm_selected_cpu->core == cortexm3)
1824 if (TARGET_THUMB1 && flag_schedule_insns)
1826 /* Don't warn since it's on by default in -O2. */
1827 flag_schedule_insns = 0;
1832 /* If optimizing for size, bump the number of instructions that we
1833 are prepared to conditionally execute (even on a StrongARM). */
1834 max_insns_skipped = 6;
1838 /* StrongARM has early execution of branches, so a sequence
1839 that is worth skipping is shorter. */
1840 if (arm_tune_strongarm)
1841 max_insns_skipped = 3;
1844 /* Hot/Cold partitioning is not currently supported, since we can't
1845 handle literal pool placement in that case. */
1846 if (flag_reorder_blocks_and_partition)
1848 inform (input_location,
1849 "-freorder-blocks-and-partition not supported on this architecture");
1850 flag_reorder_blocks_and_partition = 0;
1851 flag_reorder_blocks = 1;
1855 /* Hoisting PIC address calculations more aggressively provides a small,
1856 but measurable, size reduction for PIC code. Therefore, we decrease
1857 the bar for unrestricted expression hoisting to the cost of PIC address
1858 calculation, which is 2 instructions. */
1859 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1860 global_options.x_param_values,
1861 global_options_set.x_param_values);
1863 /* ARM EABI defaults to strict volatile bitfields. */
1864 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1865 flag_strict_volatile_bitfields = 1;
1867 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1868 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1869 if (flag_prefetch_loop_arrays < 0
1872 && current_tune->num_prefetch_slots > 0)
1873 flag_prefetch_loop_arrays = 1;
1875 /* Set up parameters to be used in prefetching algorithm. Do not override the
1876 defaults unless we are tuning for a core we have researched values for. */
1877 if (current_tune->num_prefetch_slots > 0)
1878 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1879 current_tune->num_prefetch_slots,
1880 global_options.x_param_values,
1881 global_options_set.x_param_values);
1882 if (current_tune->l1_cache_line_size >= 0)
1883 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1884 current_tune->l1_cache_line_size,
1885 global_options.x_param_values,
1886 global_options_set.x_param_values);
1887 if (current_tune->l1_cache_size >= 0)
1888 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1889 current_tune->l1_cache_size,
1890 global_options.x_param_values,
1891 global_options_set.x_param_values);
1893 /* Register global variables with the garbage collector. */
1894 arm_add_gc_roots ();
1898 arm_add_gc_roots (void)
1900 gcc_obstack_init(&minipool_obstack);
1901 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1904 /* A table of known ARM exception types.
1905 For use with the interrupt function attribute. */
1909 const char *const arg;
1910 const unsigned long return_value;
1914 static const isr_attribute_arg isr_attribute_args [] =
1916 { "IRQ", ARM_FT_ISR },
1917 { "irq", ARM_FT_ISR },
1918 { "FIQ", ARM_FT_FIQ },
1919 { "fiq", ARM_FT_FIQ },
1920 { "ABORT", ARM_FT_ISR },
1921 { "abort", ARM_FT_ISR },
1922 { "ABORT", ARM_FT_ISR },
1923 { "abort", ARM_FT_ISR },
1924 { "UNDEF", ARM_FT_EXCEPTION },
1925 { "undef", ARM_FT_EXCEPTION },
1926 { "SWI", ARM_FT_EXCEPTION },
1927 { "swi", ARM_FT_EXCEPTION },
1928 { NULL, ARM_FT_NORMAL }
1931 /* Returns the (interrupt) function type of the current
1932 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1934 static unsigned long
1935 arm_isr_value (tree argument)
1937 const isr_attribute_arg * ptr;
1941 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1943 /* No argument - default to IRQ. */
1944 if (argument == NULL_TREE)
1947 /* Get the value of the argument. */
1948 if (TREE_VALUE (argument) == NULL_TREE
1949 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1950 return ARM_FT_UNKNOWN;
1952 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1954 /* Check it against the list of known arguments. */
1955 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1956 if (streq (arg, ptr->arg))
1957 return ptr->return_value;
1959 /* An unrecognized interrupt type. */
1960 return ARM_FT_UNKNOWN;
1963 /* Computes the type of the current function. */
1965 static unsigned long
1966 arm_compute_func_type (void)
1968 unsigned long type = ARM_FT_UNKNOWN;
1972 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1974 /* Decide if the current function is volatile. Such functions
1975 never return, and many memory cycles can be saved by not storing
1976 register values that will never be needed again. This optimization
1977 was added to speed up context switching in a kernel application. */
1979 && (TREE_NOTHROW (current_function_decl)
1980 || !(flag_unwind_tables
1982 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1983 && TREE_THIS_VOLATILE (current_function_decl))
1984 type |= ARM_FT_VOLATILE;
1986 if (cfun->static_chain_decl != NULL)
1987 type |= ARM_FT_NESTED;
1989 attr = DECL_ATTRIBUTES (current_function_decl);
1991 a = lookup_attribute ("naked", attr);
1993 type |= ARM_FT_NAKED;
1995 a = lookup_attribute ("isr", attr);
1997 a = lookup_attribute ("interrupt", attr);
2000 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2002 type |= arm_isr_value (TREE_VALUE (a));
2007 /* Returns the type of the current function. */
2010 arm_current_func_type (void)
2012 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2013 cfun->machine->func_type = arm_compute_func_type ();
2015 return cfun->machine->func_type;
2019 arm_allocate_stack_slots_for_args (void)
2021 /* Naked functions should not allocate stack slots for arguments. */
2022 return !IS_NAKED (arm_current_func_type ());
2026 /* Output assembler code for a block containing the constant parts
2027 of a trampoline, leaving space for the variable parts.
2029 On the ARM, (if r8 is the static chain regnum, and remembering that
2030 referencing pc adds an offset of 8) the trampoline looks like:
2033 .word static chain value
2034 .word function's address
2035 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2038 arm_asm_trampoline_template (FILE *f)
2042 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2043 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2045 else if (TARGET_THUMB2)
2047 /* The Thumb-2 trampoline is similar to the arm implementation.
2048 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2049 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2050 STATIC_CHAIN_REGNUM, PC_REGNUM);
2051 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2055 ASM_OUTPUT_ALIGN (f, 2);
2056 fprintf (f, "\t.code\t16\n");
2057 fprintf (f, ".Ltrampoline_start:\n");
2058 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2059 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2060 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2061 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2062 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2063 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2065 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2066 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2069 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2072 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2074 rtx fnaddr, mem, a_tramp;
2076 emit_block_move (m_tramp, assemble_trampoline_template (),
2077 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2079 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2080 emit_move_insn (mem, chain_value);
2082 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2083 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2084 emit_move_insn (mem, fnaddr);
2086 a_tramp = XEXP (m_tramp, 0);
2087 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2088 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2089 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2092 /* Thumb trampolines should be entered in thumb mode, so set
2093 the bottom bit of the address. */
2096 arm_trampoline_adjust_address (rtx addr)
2099 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2100 NULL, 0, OPTAB_LIB_WIDEN);
2104 /* Return 1 if it is possible to return using a single instruction.
2105 If SIBLING is non-null, this is a test for a return before a sibling
2106 call. SIBLING is the call insn, so we can examine its register usage. */
2109 use_return_insn (int iscond, rtx sibling)
2112 unsigned int func_type;
2113 unsigned long saved_int_regs;
2114 unsigned HOST_WIDE_INT stack_adjust;
2115 arm_stack_offsets *offsets;
2117 /* Never use a return instruction before reload has run. */
2118 if (!reload_completed)
2121 func_type = arm_current_func_type ();
2123 /* Naked, volatile and stack alignment functions need special
2125 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2128 /* So do interrupt functions that use the frame pointer and Thumb
2129 interrupt functions. */
2130 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2133 offsets = arm_get_frame_offsets ();
2134 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2136 /* As do variadic functions. */
2137 if (crtl->args.pretend_args_size
2138 || cfun->machine->uses_anonymous_args
2139 /* Or if the function calls __builtin_eh_return () */
2140 || crtl->calls_eh_return
2141 /* Or if the function calls alloca */
2142 || cfun->calls_alloca
2143 /* Or if there is a stack adjustment. However, if the stack pointer
2144 is saved on the stack, we can use a pre-incrementing stack load. */
2145 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2146 && stack_adjust == 4)))
2149 saved_int_regs = offsets->saved_regs_mask;
2151 /* Unfortunately, the insn
2153 ldmib sp, {..., sp, ...}
2155 triggers a bug on most SA-110 based devices, such that the stack
2156 pointer won't be correctly restored if the instruction takes a
2157 page fault. We work around this problem by popping r3 along with
2158 the other registers, since that is never slower than executing
2159 another instruction.
2161 We test for !arm_arch5 here, because code for any architecture
2162 less than this could potentially be run on one of the buggy
2164 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2166 /* Validate that r3 is a call-clobbered register (always true in
2167 the default abi) ... */
2168 if (!call_used_regs[3])
2171 /* ... that it isn't being used for a return value ... */
2172 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2175 /* ... or for a tail-call argument ... */
2178 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2180 if (find_regno_fusage (sibling, USE, 3))
2184 /* ... and that there are no call-saved registers in r0-r2
2185 (always true in the default ABI). */
2186 if (saved_int_regs & 0x7)
2190 /* Can't be done if interworking with Thumb, and any registers have been
2192 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2195 /* On StrongARM, conditional returns are expensive if they aren't
2196 taken and multiple registers have been stacked. */
2197 if (iscond && arm_tune_strongarm)
2199 /* Conditional return when just the LR is stored is a simple
2200 conditional-load instruction, that's not expensive. */
2201 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2205 && arm_pic_register != INVALID_REGNUM
2206 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2210 /* If there are saved registers but the LR isn't saved, then we need
2211 two instructions for the return. */
2212 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2215 /* Can't be done if any of the FPA regs are pushed,
2216 since this also requires an insn. */
2217 if (TARGET_HARD_FLOAT && TARGET_FPA)
2218 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2219 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2222 /* Likewise VFP regs. */
2223 if (TARGET_HARD_FLOAT && TARGET_VFP)
2224 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2225 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2228 if (TARGET_REALLY_IWMMXT)
2229 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2230 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2236 /* Return TRUE if int I is a valid immediate ARM constant. */
2239 const_ok_for_arm (HOST_WIDE_INT i)
2243 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2244 be all zero, or all one. */
2245 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2246 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2247 != ((~(unsigned HOST_WIDE_INT) 0)
2248 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2251 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2253 /* Fast return for 0 and small values. We must do this for zero, since
2254 the code below can't handle that one case. */
2255 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2258 /* Get the number of trailing zeros. */
2259 lowbit = ffs((int) i) - 1;
2261 /* Only even shifts are allowed in ARM mode so round down to the
2262 nearest even number. */
2266 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2271 /* Allow rotated constants in ARM mode. */
2273 && ((i & ~0xc000003f) == 0
2274 || (i & ~0xf000000f) == 0
2275 || (i & ~0xfc000003) == 0))
2282 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2285 if (i == v || i == (v | (v << 8)))
2288 /* Allow repeated pattern 0xXY00XY00. */
2298 /* Return true if I is a valid constant for the operation CODE. */
2300 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2302 if (const_ok_for_arm (i))
2326 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2328 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2334 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2338 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2345 /* Emit a sequence of insns to handle a large constant.
2346 CODE is the code of the operation required, it can be any of SET, PLUS,
2347 IOR, AND, XOR, MINUS;
2348 MODE is the mode in which the operation is being performed;
2349 VAL is the integer to operate on;
2350 SOURCE is the other operand (a register, or a null-pointer for SET);
2351 SUBTARGETS means it is safe to create scratch registers if that will
2352 either produce a simpler sequence, or we will want to cse the values.
2353 Return value is the number of insns emitted. */
2355 /* ??? Tweak this for thumb2. */
2357 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2358 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2362 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2363 cond = COND_EXEC_TEST (PATTERN (insn));
2367 if (subtargets || code == SET
2368 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2369 && REGNO (target) != REGNO (source)))
2371 /* After arm_reorg has been called, we can't fix up expensive
2372 constants by pushing them into memory so we must synthesize
2373 them in-line, regardless of the cost. This is only likely to
2374 be more costly on chips that have load delay slots and we are
2375 compiling without running the scheduler (so no splitting
2376 occurred before the final instruction emission).
2378 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2380 if (!after_arm_reorg
2382 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2384 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2389 /* Currently SET is the only monadic value for CODE, all
2390 the rest are diadic. */
2391 if (TARGET_USE_MOVT)
2392 arm_emit_movpair (target, GEN_INT (val));
2394 emit_set_insn (target, GEN_INT (val));
2400 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2402 if (TARGET_USE_MOVT)
2403 arm_emit_movpair (temp, GEN_INT (val));
2405 emit_set_insn (temp, GEN_INT (val));
2407 /* For MINUS, the value is subtracted from, since we never
2408 have subtraction of a constant. */
2410 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2412 emit_set_insn (target,
2413 gen_rtx_fmt_ee (code, mode, source, temp));
2419 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2423 /* Return the number of instructions required to synthesize the given
2424 constant, if we start emitting them from bit-position I. */
2426 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2428 HOST_WIDE_INT temp1;
2429 int step_size = TARGET_ARM ? 2 : 1;
2432 gcc_assert (TARGET_ARM || i == 0);
2440 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2445 temp1 = remainder & ((0x0ff << end)
2446 | ((i < end) ? (0xff >> (32 - end)) : 0));
2447 remainder &= ~temp1;
2452 } while (remainder);
2457 find_best_start (unsigned HOST_WIDE_INT remainder)
2459 int best_consecutive_zeros = 0;
2463 /* If we aren't targetting ARM, the best place to start is always at
2468 for (i = 0; i < 32; i += 2)
2470 int consecutive_zeros = 0;
2472 if (!(remainder & (3 << i)))
2474 while ((i < 32) && !(remainder & (3 << i)))
2476 consecutive_zeros += 2;
2479 if (consecutive_zeros > best_consecutive_zeros)
2481 best_consecutive_zeros = consecutive_zeros;
2482 best_start = i - consecutive_zeros;
2488 /* So long as it won't require any more insns to do so, it's
2489 desirable to emit a small constant (in bits 0...9) in the last
2490 insn. This way there is more chance that it can be combined with
2491 a later addressing insn to form a pre-indexed load or store
2492 operation. Consider:
2494 *((volatile int *)0xe0000100) = 1;
2495 *((volatile int *)0xe0000110) = 2;
2497 We want this to wind up as:
2501 str rB, [rA, #0x100]
2503 str rB, [rA, #0x110]
2505 rather than having to synthesize both large constants from scratch.
2507 Therefore, we calculate how many insns would be required to emit
2508 the constant starting from `best_start', and also starting from
2509 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2510 yield a shorter sequence, we may as well use zero. */
2512 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2513 && (count_insns_for_constant (remainder, 0) <=
2514 count_insns_for_constant (remainder, best_start)))
2520 /* Emit an instruction with the indicated PATTERN. If COND is
2521 non-NULL, conditionalize the execution of the instruction on COND
2525 emit_constant_insn (rtx cond, rtx pattern)
2528 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2529 emit_insn (pattern);
2532 /* As above, but extra parameter GENERATE which, if clear, suppresses
2534 /* ??? This needs more work for thumb2. */
2537 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2538 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2543 int final_invert = 0;
2544 int can_negate_initial = 0;
2546 int num_bits_set = 0;
2547 int set_sign_bit_copies = 0;
2548 int clear_sign_bit_copies = 0;
2549 int clear_zero_bit_copies = 0;
2550 int set_zero_bit_copies = 0;
2552 unsigned HOST_WIDE_INT temp1, temp2;
2553 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2554 int step_size = TARGET_ARM ? 2 : 1;
2556 /* Find out which operations are safe for a given CODE. Also do a quick
2557 check for degenerate cases; these can occur when DImode operations
2568 can_negate_initial = 1;
2572 if (remainder == 0xffffffff)
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target,
2577 GEN_INT (ARM_SIGN_EXTEND (val))));
2583 if (reload_completed && rtx_equal_p (target, source))
2587 emit_constant_insn (cond,
2588 gen_rtx_SET (VOIDmode, target, source));
2600 emit_constant_insn (cond,
2601 gen_rtx_SET (VOIDmode, target, const0_rtx));
2604 if (remainder == 0xffffffff)
2606 if (reload_completed && rtx_equal_p (target, source))
2609 emit_constant_insn (cond,
2610 gen_rtx_SET (VOIDmode, target, source));
2619 if (reload_completed && rtx_equal_p (target, source))
2622 emit_constant_insn (cond,
2623 gen_rtx_SET (VOIDmode, target, source));
2627 if (remainder == 0xffffffff)
2630 emit_constant_insn (cond,
2631 gen_rtx_SET (VOIDmode, target,
2632 gen_rtx_NOT (mode, source)));
2638 /* We treat MINUS as (val - source), since (source - val) is always
2639 passed as (source + (-val)). */
2643 emit_constant_insn (cond,
2644 gen_rtx_SET (VOIDmode, target,
2645 gen_rtx_NEG (mode, source)));
2648 if (const_ok_for_arm (val))
2651 emit_constant_insn (cond,
2652 gen_rtx_SET (VOIDmode, target,
2653 gen_rtx_MINUS (mode, GEN_INT (val),
2665 /* If we can do it in one insn get out quickly. */
2666 if (const_ok_for_arm (val)
2667 || (can_negate_initial && const_ok_for_arm (-val))
2668 || (can_invert && const_ok_for_arm (~val)))
2671 emit_constant_insn (cond,
2672 gen_rtx_SET (VOIDmode, target,
2674 ? gen_rtx_fmt_ee (code, mode, source,
2680 /* Calculate a few attributes that may be useful for specific
2682 /* Count number of leading zeros. */
2683 for (i = 31; i >= 0; i--)
2685 if ((remainder & (1 << i)) == 0)
2686 clear_sign_bit_copies++;
2691 /* Count number of leading 1's. */
2692 for (i = 31; i >= 0; i--)
2694 if ((remainder & (1 << i)) != 0)
2695 set_sign_bit_copies++;
2700 /* Count number of trailing zero's. */
2701 for (i = 0; i <= 31; i++)
2703 if ((remainder & (1 << i)) == 0)
2704 clear_zero_bit_copies++;
2709 /* Count number of trailing 1's. */
2710 for (i = 0; i <= 31; i++)
2712 if ((remainder & (1 << i)) != 0)
2713 set_zero_bit_copies++;
2721 /* See if we can use movw. */
2722 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2725 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2730 /* See if we can do this by sign_extending a constant that is known
2731 to be negative. This is a good, way of doing it, since the shift
2732 may well merge into a subsequent insn. */
2733 if (set_sign_bit_copies > 1)
2735 if (const_ok_for_arm
2736 (temp1 = ARM_SIGN_EXTEND (remainder
2737 << (set_sign_bit_copies - 1))))
2741 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2742 emit_constant_insn (cond,
2743 gen_rtx_SET (VOIDmode, new_src,
2745 emit_constant_insn (cond,
2746 gen_ashrsi3 (target, new_src,
2747 GEN_INT (set_sign_bit_copies - 1)));
2751 /* For an inverted constant, we will need to set the low bits,
2752 these will be shifted out of harm's way. */
2753 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2754 if (const_ok_for_arm (~temp1))
2758 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2759 emit_constant_insn (cond,
2760 gen_rtx_SET (VOIDmode, new_src,
2762 emit_constant_insn (cond,
2763 gen_ashrsi3 (target, new_src,
2764 GEN_INT (set_sign_bit_copies - 1)));
2770 /* See if we can calculate the value as the difference between two
2771 valid immediates. */
2772 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2774 int topshift = clear_sign_bit_copies & ~1;
2776 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2777 & (0xff000000 >> topshift));
2779 /* If temp1 is zero, then that means the 9 most significant
2780 bits of remainder were 1 and we've caused it to overflow.
2781 When topshift is 0 we don't need to do anything since we
2782 can borrow from 'bit 32'. */
2783 if (temp1 == 0 && topshift != 0)
2784 temp1 = 0x80000000 >> (topshift - 1);
2786 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2788 if (const_ok_for_arm (temp2))
2792 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2793 emit_constant_insn (cond,
2794 gen_rtx_SET (VOIDmode, new_src,
2796 emit_constant_insn (cond,
2797 gen_addsi3 (target, new_src,
2805 /* See if we can generate this by setting the bottom (or the top)
2806 16 bits, and then shifting these into the other half of the
2807 word. We only look for the simplest cases, to do more would cost
2808 too much. Be careful, however, not to generate this when the
2809 alternative would take fewer insns. */
2810 if (val & 0xffff0000)
2812 temp1 = remainder & 0xffff0000;
2813 temp2 = remainder & 0x0000ffff;
2815 /* Overlaps outside this range are best done using other methods. */
2816 for (i = 9; i < 24; i++)
2818 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2819 && !const_ok_for_arm (temp2))
2821 rtx new_src = (subtargets
2822 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2824 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2825 source, subtargets, generate);
2833 gen_rtx_ASHIFT (mode, source,
2840 /* Don't duplicate cases already considered. */
2841 for (i = 17; i < 24; i++)
2843 if (((temp1 | (temp1 >> i)) == remainder)
2844 && !const_ok_for_arm (temp1))
2846 rtx new_src = (subtargets
2847 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2849 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2850 source, subtargets, generate);
2855 gen_rtx_SET (VOIDmode, target,
2858 gen_rtx_LSHIFTRT (mode, source,
2869 /* If we have IOR or XOR, and the constant can be loaded in a
2870 single instruction, and we can find a temporary to put it in,
2871 then this can be done in two instructions instead of 3-4. */
2873 /* TARGET can't be NULL if SUBTARGETS is 0 */
2874 || (reload_completed && !reg_mentioned_p (target, source)))
2876 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2880 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2882 emit_constant_insn (cond,
2883 gen_rtx_SET (VOIDmode, sub,
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target,
2887 gen_rtx_fmt_ee (code, mode,
2898 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2899 and the remainder 0s for e.g. 0xfff00000)
2900 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2902 This can be done in 2 instructions by using shifts with mov or mvn.
2907 mvn r0, r0, lsr #12 */
2908 if (set_sign_bit_copies > 8
2909 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2913 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2914 rtx shift = GEN_INT (set_sign_bit_copies);
2918 gen_rtx_SET (VOIDmode, sub,
2920 gen_rtx_ASHIFT (mode,
2925 gen_rtx_SET (VOIDmode, target,
2927 gen_rtx_LSHIFTRT (mode, sub,
2934 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2936 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2938 For eg. r0 = r0 | 0xfff
2943 if (set_zero_bit_copies > 8
2944 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2948 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2949 rtx shift = GEN_INT (set_zero_bit_copies);
2953 gen_rtx_SET (VOIDmode, sub,
2955 gen_rtx_LSHIFTRT (mode,
2960 gen_rtx_SET (VOIDmode, target,
2962 gen_rtx_ASHIFT (mode, sub,
2968 /* This will never be reached for Thumb2 because orn is a valid
2969 instruction. This is for Thumb1 and the ARM 32 bit cases.
2971 x = y | constant (such that ~constant is a valid constant)
2973 x = ~(~y & ~constant).
2975 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2979 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2980 emit_constant_insn (cond,
2981 gen_rtx_SET (VOIDmode, sub,
2982 gen_rtx_NOT (mode, source)));
2985 sub = gen_reg_rtx (mode);
2986 emit_constant_insn (cond,
2987 gen_rtx_SET (VOIDmode, sub,
2988 gen_rtx_AND (mode, source,
2990 emit_constant_insn (cond,
2991 gen_rtx_SET (VOIDmode, target,
2992 gen_rtx_NOT (mode, sub)));
2999 /* See if two shifts will do 2 or more insn's worth of work. */
3000 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3002 HOST_WIDE_INT shift_mask = ((0xffffffff
3003 << (32 - clear_sign_bit_copies))
3006 if ((remainder | shift_mask) != 0xffffffff)
3010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3011 insns = arm_gen_constant (AND, mode, cond,
3012 remainder | shift_mask,
3013 new_src, source, subtargets, 1);
3018 rtx targ = subtargets ? NULL_RTX : target;
3019 insns = arm_gen_constant (AND, mode, cond,
3020 remainder | shift_mask,
3021 targ, source, subtargets, 0);
3027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3028 rtx shift = GEN_INT (clear_sign_bit_copies);
3030 emit_insn (gen_ashlsi3 (new_src, source, shift));
3031 emit_insn (gen_lshrsi3 (target, new_src, shift));
3037 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3039 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3041 if ((remainder | shift_mask) != 0xffffffff)
3045 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047 insns = arm_gen_constant (AND, mode, cond,
3048 remainder | shift_mask,
3049 new_src, source, subtargets, 1);
3054 rtx targ = subtargets ? NULL_RTX : target;
3056 insns = arm_gen_constant (AND, mode, cond,
3057 remainder | shift_mask,
3058 targ, source, subtargets, 0);
3064 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3065 rtx shift = GEN_INT (clear_zero_bit_copies);
3067 emit_insn (gen_lshrsi3 (new_src, source, shift));
3068 emit_insn (gen_ashlsi3 (target, new_src, shift));
3080 for (i = 0; i < 32; i++)
3081 if (remainder & (1 << i))
3085 || (code != IOR && can_invert && num_bits_set > 16))
3086 remainder ^= 0xffffffff;
3087 else if (code == PLUS && num_bits_set > 16)
3088 remainder = (-remainder) & 0xffffffff;
3090 /* For XOR, if more than half the bits are set and there's a sequence
3091 of more than 8 consecutive ones in the pattern then we can XOR by the
3092 inverted constant and then invert the final result; this may save an
3093 instruction and might also lead to the final mvn being merged with
3094 some other operation. */
3095 else if (code == XOR && num_bits_set > 16
3096 && (count_insns_for_constant (remainder ^ 0xffffffff,
3098 (remainder ^ 0xffffffff))
3099 < count_insns_for_constant (remainder,
3100 find_best_start (remainder))))
3102 remainder ^= 0xffffffff;
3111 /* Now try and find a way of doing the job in either two or three
3113 We start by looking for the largest block of zeros that are aligned on
3114 a 2-bit boundary, we then fill up the temps, wrapping around to the
3115 top of the word when we drop off the bottom.
3116 In the worst case this code should produce no more than four insns.
3117 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3118 best place to start. */
3120 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3123 /* Now start emitting the insns. */
3124 i = find_best_start (remainder);
3131 if (remainder & (3 << (i - 2)))
3136 temp1 = remainder & ((0x0ff << end)
3137 | ((i < end) ? (0xff >> (32 - end)) : 0));
3138 remainder &= ~temp1;
3142 rtx new_src, temp1_rtx;
3144 if (code == SET || code == MINUS)
3146 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3147 if (can_invert && code != MINUS)
3152 if ((final_invert || remainder) && subtargets)
3153 new_src = gen_reg_rtx (mode);
3158 else if (can_negate)
3162 temp1 = trunc_int_for_mode (temp1, mode);
3163 temp1_rtx = GEN_INT (temp1);
3167 else if (code == MINUS)
3168 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3170 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3172 emit_constant_insn (cond,
3173 gen_rtx_SET (VOIDmode, new_src,
3183 else if (code == MINUS)
3189 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3199 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3200 gen_rtx_NOT (mode, source)));
3207 /* Canonicalize a comparison so that we are more likely to recognize it.
3208 This can be done for a few constant compares, where we can make the
3209 immediate value easier to load. */
3212 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3214 enum machine_mode mode;
3215 unsigned HOST_WIDE_INT i, maxval;
3217 mode = GET_MODE (*op0);
3218 if (mode == VOIDmode)
3219 mode = GET_MODE (*op1);
3221 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3223 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3224 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3225 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3226 for GTU/LEU in Thumb mode. */
3231 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3233 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3236 if (code == GT || code == LE
3237 || (!TARGET_ARM && (code == GTU || code == LEU)))
3239 /* Missing comparison. First try to use an available
3241 if (GET_CODE (*op1) == CONST_INT)
3249 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3251 *op1 = GEN_INT (i + 1);
3252 return code == GT ? GE : LT;
3257 if (i != ~((unsigned HOST_WIDE_INT) 0)
3258 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3260 *op1 = GEN_INT (i + 1);
3261 return code == GTU ? GEU : LTU;
3269 /* If that did not work, reverse the condition. */
3273 return swap_condition (code);
3279 /* Comparisons smaller than DImode. Only adjust comparisons against
3280 an out-of-range constant. */
3281 if (GET_CODE (*op1) != CONST_INT
3282 || const_ok_for_arm (INTVAL (*op1))
3283 || const_ok_for_arm (- INTVAL (*op1)))
3297 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3299 *op1 = GEN_INT (i + 1);
3300 return code == GT ? GE : LT;
3307 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3309 *op1 = GEN_INT (i - 1);
3310 return code == GE ? GT : LE;
3316 if (i != ~((unsigned HOST_WIDE_INT) 0)
3317 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3319 *op1 = GEN_INT (i + 1);
3320 return code == GTU ? GEU : LTU;
3327 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3329 *op1 = GEN_INT (i - 1);
3330 return code == GEU ? GTU : LEU;
3342 /* Define how to find the value returned by a function. */
3345 arm_function_value(const_tree type, const_tree func,
3346 bool outgoing ATTRIBUTE_UNUSED)
3348 enum machine_mode mode;
3349 int unsignedp ATTRIBUTE_UNUSED;
3350 rtx r ATTRIBUTE_UNUSED;
3352 mode = TYPE_MODE (type);
3354 if (TARGET_AAPCS_BASED)
3355 return aapcs_allocate_return_reg (mode, type, func);
3357 /* Promote integer types. */
3358 if (INTEGRAL_TYPE_P (type))
3359 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3361 /* Promotes small structs returned in a register to full-word size
3362 for big-endian AAPCS. */
3363 if (arm_return_in_msb (type))
3365 HOST_WIDE_INT size = int_size_in_bytes (type);
3366 if (size % UNITS_PER_WORD != 0)
3368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3373 return LIBCALL_VALUE (mode);
3377 libcall_eq (const void *p1, const void *p2)
3379 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3383 libcall_hash (const void *p1)
3385 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3389 add_libcall (htab_t htab, rtx libcall)
3391 *htab_find_slot (htab, libcall, INSERT) = libcall;
3395 arm_libcall_uses_aapcs_base (const_rtx libcall)
3397 static bool init_done = false;
3398 static htab_t libcall_htab;
3404 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3406 add_libcall (libcall_htab,
3407 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3408 add_libcall (libcall_htab,
3409 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3410 add_libcall (libcall_htab,
3411 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3412 add_libcall (libcall_htab,
3413 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3415 add_libcall (libcall_htab,
3416 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3417 add_libcall (libcall_htab,
3418 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3419 add_libcall (libcall_htab,
3420 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3421 add_libcall (libcall_htab,
3422 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3424 add_libcall (libcall_htab,
3425 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3426 add_libcall (libcall_htab,
3427 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3428 add_libcall (libcall_htab,
3429 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3430 add_libcall (libcall_htab,
3431 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3432 add_libcall (libcall_htab,
3433 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3434 add_libcall (libcall_htab,
3435 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3438 return libcall && htab_find (libcall_htab, libcall) != NULL;
3442 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3444 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3445 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3447 /* The following libcalls return their result in integer registers,
3448 even though they return a floating point value. */
3449 if (arm_libcall_uses_aapcs_base (libcall))
3450 return gen_rtx_REG (mode, ARG_REGISTER(1));
3454 return LIBCALL_VALUE (mode);
3457 /* Determine the amount of memory needed to store the possible return
3458 registers of an untyped call. */
3460 arm_apply_result_size (void)
3466 if (TARGET_HARD_FLOAT_ABI)
3472 if (TARGET_MAVERICK)
3475 if (TARGET_IWMMXT_ABI)
3482 /* Decide whether TYPE should be returned in memory (true)
3483 or in a register (false). FNTYPE is the type of the function making
3486 arm_return_in_memory (const_tree type, const_tree fntype)
3490 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3492 if (TARGET_AAPCS_BASED)
3494 /* Simple, non-aggregate types (ie not including vectors and
3495 complex) are always returned in a register (or registers).
3496 We don't care about which register here, so we can short-cut
3497 some of the detail. */
3498 if (!AGGREGATE_TYPE_P (type)
3499 && TREE_CODE (type) != VECTOR_TYPE
3500 && TREE_CODE (type) != COMPLEX_TYPE)
3503 /* Any return value that is no larger than one word can be
3505 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3508 /* Check any available co-processors to see if they accept the
3509 type as a register candidate (VFP, for example, can return
3510 some aggregates in consecutive registers). These aren't
3511 available if the call is variadic. */
3512 if (aapcs_select_return_coproc (type, fntype) >= 0)
3515 /* Vector values should be returned using ARM registers, not
3516 memory (unless they're over 16 bytes, which will break since
3517 we only have four call-clobbered registers to play with). */
3518 if (TREE_CODE (type) == VECTOR_TYPE)
3519 return (size < 0 || size > (4 * UNITS_PER_WORD));
3521 /* The rest go in memory. */
3525 if (TREE_CODE (type) == VECTOR_TYPE)
3526 return (size < 0 || size > (4 * UNITS_PER_WORD));
3528 if (!AGGREGATE_TYPE_P (type) &&
3529 (TREE_CODE (type) != VECTOR_TYPE))
3530 /* All simple types are returned in registers. */
3533 if (arm_abi != ARM_ABI_APCS)
3535 /* ATPCS and later return aggregate types in memory only if they are
3536 larger than a word (or are variable size). */
3537 return (size < 0 || size > UNITS_PER_WORD);
3540 /* For the arm-wince targets we choose to be compatible with Microsoft's
3541 ARM and Thumb compilers, which always return aggregates in memory. */
3543 /* All structures/unions bigger than one word are returned in memory.
3544 Also catch the case where int_size_in_bytes returns -1. In this case
3545 the aggregate is either huge or of variable size, and in either case
3546 we will want to return it via memory and not in a register. */
3547 if (size < 0 || size > UNITS_PER_WORD)
3550 if (TREE_CODE (type) == RECORD_TYPE)
3554 /* For a struct the APCS says that we only return in a register
3555 if the type is 'integer like' and every addressable element
3556 has an offset of zero. For practical purposes this means
3557 that the structure can have at most one non bit-field element
3558 and that this element must be the first one in the structure. */
3560 /* Find the first field, ignoring non FIELD_DECL things which will
3561 have been created by C++. */
3562 for (field = TYPE_FIELDS (type);
3563 field && TREE_CODE (field) != FIELD_DECL;
3564 field = DECL_CHAIN (field))
3568 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3570 /* Check that the first field is valid for returning in a register. */
3572 /* ... Floats are not allowed */
3573 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3576 /* ... Aggregates that are not themselves valid for returning in
3577 a register are not allowed. */
3578 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3581 /* Now check the remaining fields, if any. Only bitfields are allowed,
3582 since they are not addressable. */
3583 for (field = DECL_CHAIN (field);
3585 field = DECL_CHAIN (field))
3587 if (TREE_CODE (field) != FIELD_DECL)
3590 if (!DECL_BIT_FIELD_TYPE (field))
3597 if (TREE_CODE (type) == UNION_TYPE)
3601 /* Unions can be returned in registers if every element is
3602 integral, or can be returned in an integer register. */
3603 for (field = TYPE_FIELDS (type);
3605 field = DECL_CHAIN (field))
3607 if (TREE_CODE (field) != FIELD_DECL)
3610 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3613 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3619 #endif /* not ARM_WINCE */
3621 /* Return all other types in memory. */
3625 /* Indicate whether or not words of a double are in big-endian order. */
3628 arm_float_words_big_endian (void)
3630 if (TARGET_MAVERICK)
3633 /* For FPA, float words are always big-endian. For VFP, floats words
3634 follow the memory system mode. */
3642 return (TARGET_BIG_END ? 1 : 0);
3647 const struct pcs_attribute_arg
3651 } pcs_attribute_args[] =
3653 {"aapcs", ARM_PCS_AAPCS},
3654 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3656 /* We could recognize these, but changes would be needed elsewhere
3657 * to implement them. */
3658 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3659 {"atpcs", ARM_PCS_ATPCS},
3660 {"apcs", ARM_PCS_APCS},
3662 {NULL, ARM_PCS_UNKNOWN}
3666 arm_pcs_from_attribute (tree attr)
3668 const struct pcs_attribute_arg *ptr;
3671 /* Get the value of the argument. */
3672 if (TREE_VALUE (attr) == NULL_TREE
3673 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3674 return ARM_PCS_UNKNOWN;
3676 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3678 /* Check it against the list of known arguments. */
3679 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3680 if (streq (arg, ptr->arg))
3683 /* An unrecognized interrupt type. */
3684 return ARM_PCS_UNKNOWN;
3687 /* Get the PCS variant to use for this call. TYPE is the function's type
3688 specification, DECL is the specific declartion. DECL may be null if
3689 the call could be indirect or if this is a library call. */
3691 arm_get_pcs_model (const_tree type, const_tree decl)
3693 bool user_convention = false;
3694 enum arm_pcs user_pcs = arm_pcs_default;
3699 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3702 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3703 user_convention = true;
3706 if (TARGET_AAPCS_BASED)
3708 /* Detect varargs functions. These always use the base rules
3709 (no argument is ever a candidate for a co-processor
3711 bool base_rules = stdarg_p (type);
3713 if (user_convention)
3715 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3716 sorry ("non-AAPCS derived PCS variant");
3717 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3718 error ("variadic functions must use the base AAPCS variant");
3722 return ARM_PCS_AAPCS;
3723 else if (user_convention)
3725 else if (decl && flag_unit_at_a_time)
3727 /* Local functions never leak outside this compilation unit,
3728 so we are free to use whatever conventions are
3730 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3731 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3733 return ARM_PCS_AAPCS_LOCAL;
3736 else if (user_convention && user_pcs != arm_pcs_default)
3737 sorry ("PCS variant");
3739 /* For everything else we use the target's default. */
3740 return arm_pcs_default;
3745 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3746 const_tree fntype ATTRIBUTE_UNUSED,
3747 rtx libcall ATTRIBUTE_UNUSED,
3748 const_tree fndecl ATTRIBUTE_UNUSED)
3750 /* Record the unallocated VFP registers. */
3751 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3752 pcum->aapcs_vfp_reg_alloc = 0;
3755 /* Walk down the type tree of TYPE counting consecutive base elements.
3756 If *MODEP is VOIDmode, then set it to the first valid floating point
3757 type. If a non-floating point type is found, or if a floating point
3758 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3759 otherwise return the count in the sub-tree. */
3761 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3763 enum machine_mode mode;
3766 switch (TREE_CODE (type))
3769 mode = TYPE_MODE (type);
3770 if (mode != DFmode && mode != SFmode)
3773 if (*modep == VOIDmode)
3782 mode = TYPE_MODE (TREE_TYPE (type));
3783 if (mode != DFmode && mode != SFmode)
3786 if (*modep == VOIDmode)
3795 /* Use V2SImode and V4SImode as representatives of all 64-bit
3796 and 128-bit vector types, whether or not those modes are
3797 supported with the present options. */
3798 size = int_size_in_bytes (type);
3811 if (*modep == VOIDmode)
3814 /* Vector modes are considered to be opaque: two vectors are
3815 equivalent for the purposes of being homogeneous aggregates
3816 if they are the same size. */
3825 tree index = TYPE_DOMAIN (type);
3827 /* Can't handle incomplete types. */
3828 if (!COMPLETE_TYPE_P(type))
3831 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3834 || !TYPE_MAX_VALUE (index)
3835 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3836 || !TYPE_MIN_VALUE (index)
3837 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3841 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3842 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3844 /* There must be no padding. */
3845 if (!host_integerp (TYPE_SIZE (type), 1)
3846 || (tree_low_cst (TYPE_SIZE (type), 1)
3847 != count * GET_MODE_BITSIZE (*modep)))
3859 /* Can't handle incomplete types. */
3860 if (!COMPLETE_TYPE_P(type))
3863 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3865 if (TREE_CODE (field) != FIELD_DECL)
3868 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3874 /* There must be no padding. */
3875 if (!host_integerp (TYPE_SIZE (type), 1)
3876 || (tree_low_cst (TYPE_SIZE (type), 1)
3877 != count * GET_MODE_BITSIZE (*modep)))
3884 case QUAL_UNION_TYPE:
3886 /* These aren't very interesting except in a degenerate case. */
3891 /* Can't handle incomplete types. */
3892 if (!COMPLETE_TYPE_P(type))
3895 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3897 if (TREE_CODE (field) != FIELD_DECL)
3900 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3903 count = count > sub_count ? count : sub_count;
3906 /* There must be no padding. */
3907 if (!host_integerp (TYPE_SIZE (type), 1)
3908 || (tree_low_cst (TYPE_SIZE (type), 1)
3909 != count * GET_MODE_BITSIZE (*modep)))
3922 /* Return true if PCS_VARIANT should use VFP registers. */
3924 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3926 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3928 static bool seen_thumb1_vfp = false;
3930 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3932 sorry ("Thumb-1 hard-float VFP ABI");
3933 /* sorry() is not immediately fatal, so only display this once. */
3934 seen_thumb1_vfp = true;
3940 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3943 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3944 (TARGET_VFP_DOUBLE || !is_double));
3948 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3949 enum machine_mode mode, const_tree type,
3950 enum machine_mode *base_mode, int *count)
3952 enum machine_mode new_mode = VOIDmode;
3954 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3955 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3956 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3961 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3964 new_mode = (mode == DCmode ? DFmode : SFmode);
3966 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3968 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3970 if (ag_count > 0 && ag_count <= 4)
3979 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3982 *base_mode = new_mode;
3987 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3988 enum machine_mode mode, const_tree type)
3990 int count ATTRIBUTE_UNUSED;
3991 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3993 if (!use_vfp_abi (pcs_variant, false))
3995 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4000 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4003 if (!use_vfp_abi (pcum->pcs_variant, false))
4006 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4007 &pcum->aapcs_vfp_rmode,
4008 &pcum->aapcs_vfp_rcount);
4012 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4013 const_tree type ATTRIBUTE_UNUSED)
4015 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4016 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4019 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4020 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4022 pcum->aapcs_vfp_reg_alloc = mask << regno;
4023 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4026 int rcount = pcum->aapcs_vfp_rcount;
4028 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4032 /* Avoid using unsupported vector modes. */
4033 if (rmode == V2SImode)
4035 else if (rmode == V4SImode)
4042 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4043 for (i = 0; i < rcount; i++)
4045 rtx tmp = gen_rtx_REG (rmode,
4046 FIRST_VFP_REGNUM + regno + i * rshift);
4047 tmp = gen_rtx_EXPR_LIST
4049 GEN_INT (i * GET_MODE_SIZE (rmode)));
4050 XVECEXP (par, 0, i) = tmp;
4053 pcum->aapcs_reg = par;
4056 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4063 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4064 enum machine_mode mode,
4065 const_tree type ATTRIBUTE_UNUSED)
4067 if (!use_vfp_abi (pcs_variant, false))
4070 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4073 enum machine_mode ag_mode;
4078 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4083 if (ag_mode == V2SImode)
4085 else if (ag_mode == V4SImode)
4091 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4092 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4093 for (i = 0; i < count; i++)
4095 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4096 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4097 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4098 XVECEXP (par, 0, i) = tmp;
4104 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4108 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4109 enum machine_mode mode ATTRIBUTE_UNUSED,
4110 const_tree type ATTRIBUTE_UNUSED)
4112 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4113 pcum->aapcs_vfp_reg_alloc = 0;
4117 #define AAPCS_CP(X) \
4119 aapcs_ ## X ## _cum_init, \
4120 aapcs_ ## X ## _is_call_candidate, \
4121 aapcs_ ## X ## _allocate, \
4122 aapcs_ ## X ## _is_return_candidate, \
4123 aapcs_ ## X ## _allocate_return_reg, \
4124 aapcs_ ## X ## _advance \
4127 /* Table of co-processors that can be used to pass arguments in
4128 registers. Idealy no arugment should be a candidate for more than
4129 one co-processor table entry, but the table is processed in order
4130 and stops after the first match. If that entry then fails to put
4131 the argument into a co-processor register, the argument will go on
4135 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4136 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4138 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4139 BLKmode) is a candidate for this co-processor's registers; this
4140 function should ignore any position-dependent state in
4141 CUMULATIVE_ARGS and only use call-type dependent information. */
4142 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4144 /* Return true if the argument does get a co-processor register; it
4145 should set aapcs_reg to an RTX of the register allocated as is
4146 required for a return from FUNCTION_ARG. */
4147 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4149 /* Return true if a result of mode MODE (or type TYPE if MODE is
4150 BLKmode) is can be returned in this co-processor's registers. */
4151 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4153 /* Allocate and return an RTX element to hold the return type of a
4154 call, this routine must not fail and will only be called if
4155 is_return_candidate returned true with the same parameters. */
4156 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4158 /* Finish processing this argument and prepare to start processing
4160 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4161 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4169 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4174 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4175 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4182 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4184 /* We aren't passed a decl, so we can't check that a call is local.
4185 However, it isn't clear that that would be a win anyway, since it
4186 might limit some tail-calling opportunities. */
4187 enum arm_pcs pcs_variant;
4191 const_tree fndecl = NULL_TREE;
4193 if (TREE_CODE (fntype) == FUNCTION_DECL)
4196 fntype = TREE_TYPE (fntype);
4199 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4202 pcs_variant = arm_pcs_default;
4204 if (pcs_variant != ARM_PCS_AAPCS)
4208 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4209 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4218 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4221 /* We aren't passed a decl, so we can't check that a call is local.
4222 However, it isn't clear that that would be a win anyway, since it
4223 might limit some tail-calling opportunities. */
4224 enum arm_pcs pcs_variant;
4225 int unsignedp ATTRIBUTE_UNUSED;
4229 const_tree fndecl = NULL_TREE;
4231 if (TREE_CODE (fntype) == FUNCTION_DECL)
4234 fntype = TREE_TYPE (fntype);
4237 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4240 pcs_variant = arm_pcs_default;
4242 /* Promote integer types. */
4243 if (type && INTEGRAL_TYPE_P (type))
4244 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4246 if (pcs_variant != ARM_PCS_AAPCS)
4250 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4251 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4253 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4257 /* Promotes small structs returned in a register to full-word size
4258 for big-endian AAPCS. */
4259 if (type && arm_return_in_msb (type))
4261 HOST_WIDE_INT size = int_size_in_bytes (type);
4262 if (size % UNITS_PER_WORD != 0)
4264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4269 return gen_rtx_REG (mode, R0_REGNUM);
4273 aapcs_libcall_value (enum machine_mode mode)
4275 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4278 /* Lay out a function argument using the AAPCS rules. The rule
4279 numbers referred to here are those in the AAPCS. */
4281 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4282 const_tree type, bool named)
4287 /* We only need to do this once per argument. */
4288 if (pcum->aapcs_arg_processed)
4291 pcum->aapcs_arg_processed = true;
4293 /* Special case: if named is false then we are handling an incoming
4294 anonymous argument which is on the stack. */
4298 /* Is this a potential co-processor register candidate? */
4299 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4301 int slot = aapcs_select_call_coproc (pcum, mode, type);
4302 pcum->aapcs_cprc_slot = slot;
4304 /* We don't have to apply any of the rules from part B of the
4305 preparation phase, these are handled elsewhere in the
4310 /* A Co-processor register candidate goes either in its own
4311 class of registers or on the stack. */
4312 if (!pcum->aapcs_cprc_failed[slot])
4314 /* C1.cp - Try to allocate the argument to co-processor
4316 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4319 /* C2.cp - Put the argument on the stack and note that we
4320 can't assign any more candidates in this slot. We also
4321 need to note that we have allocated stack space, so that
4322 we won't later try to split a non-cprc candidate between
4323 core registers and the stack. */
4324 pcum->aapcs_cprc_failed[slot] = true;
4325 pcum->can_split = false;
4328 /* We didn't get a register, so this argument goes on the
4330 gcc_assert (pcum->can_split == false);
4335 /* C3 - For double-word aligned arguments, round the NCRN up to the
4336 next even number. */
4337 ncrn = pcum->aapcs_ncrn;
4338 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4341 nregs = ARM_NUM_REGS2(mode, type);
4343 /* Sigh, this test should really assert that nregs > 0, but a GCC
4344 extension allows empty structs and then gives them empty size; it
4345 then allows such a structure to be passed by value. For some of
4346 the code below we have to pretend that such an argument has
4347 non-zero size so that we 'locate' it correctly either in
4348 registers or on the stack. */
4349 gcc_assert (nregs >= 0);
4351 nregs2 = nregs ? nregs : 1;
4353 /* C4 - Argument fits entirely in core registers. */
4354 if (ncrn + nregs2 <= NUM_ARG_REGS)
4356 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4357 pcum->aapcs_next_ncrn = ncrn + nregs;
4361 /* C5 - Some core registers left and there are no arguments already
4362 on the stack: split this argument between the remaining core
4363 registers and the stack. */
4364 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4366 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4367 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4368 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4372 /* C6 - NCRN is set to 4. */
4373 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4375 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4379 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4380 for a call to a function whose data type is FNTYPE.
4381 For a library call, FNTYPE is NULL. */
4383 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4385 tree fndecl ATTRIBUTE_UNUSED)
4387 /* Long call handling. */
4389 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4391 pcum->pcs_variant = arm_pcs_default;
4393 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4395 if (arm_libcall_uses_aapcs_base (libname))
4396 pcum->pcs_variant = ARM_PCS_AAPCS;
4398 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4399 pcum->aapcs_reg = NULL_RTX;
4400 pcum->aapcs_partial = 0;
4401 pcum->aapcs_arg_processed = false;
4402 pcum->aapcs_cprc_slot = -1;
4403 pcum->can_split = true;
4405 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4409 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4411 pcum->aapcs_cprc_failed[i] = false;
4412 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4420 /* On the ARM, the offset starts at 0. */
4422 pcum->iwmmxt_nregs = 0;
4423 pcum->can_split = true;
4425 /* Varargs vectors are treated the same as long long.
4426 named_count avoids having to change the way arm handles 'named' */
4427 pcum->named_count = 0;
4430 if (TARGET_REALLY_IWMMXT && fntype)
4434 for (fn_arg = TYPE_ARG_TYPES (fntype);
4436 fn_arg = TREE_CHAIN (fn_arg))
4437 pcum->named_count += 1;
4439 if (! pcum->named_count)
4440 pcum->named_count = INT_MAX;
4445 /* Return true if mode/type need doubleword alignment. */
4447 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4449 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4450 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4454 /* Determine where to put an argument to a function.
4455 Value is zero to push the argument on the stack,
4456 or a hard register in which to store the argument.
4458 MODE is the argument's machine mode.
4459 TYPE is the data type of the argument (as a tree).
4460 This is null for libcalls where that information may
4462 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4463 the preceding args and about the function being called.
4464 NAMED is nonzero if this argument is a named parameter
4465 (otherwise it is an extra parameter matching an ellipsis).
4467 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4468 other arguments are passed on the stack. If (NAMED == 0) (which happens
4469 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4470 defined), say it is passed in the stack (function_prologue will
4471 indeed make it pass in the stack if necessary). */
4474 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4475 const_tree type, bool named)
4479 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4480 a call insn (op3 of a call_value insn). */
4481 if (mode == VOIDmode)
4484 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4486 aapcs_layout_arg (pcum, mode, type, named);
4487 return pcum->aapcs_reg;
4490 /* Varargs vectors are treated the same as long long.
4491 named_count avoids having to change the way arm handles 'named' */
4492 if (TARGET_IWMMXT_ABI
4493 && arm_vector_mode_supported_p (mode)
4494 && pcum->named_count > pcum->nargs + 1)
4496 if (pcum->iwmmxt_nregs <= 9)
4497 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4500 pcum->can_split = false;
4505 /* Put doubleword aligned quantities in even register pairs. */
4507 && ARM_DOUBLEWORD_ALIGN
4508 && arm_needs_doubleword_align (mode, type))
4511 /* Only allow splitting an arg between regs and memory if all preceding
4512 args were allocated to regs. For args passed by reference we only count
4513 the reference pointer. */
4514 if (pcum->can_split)
4517 nregs = ARM_NUM_REGS2 (mode, type);
4519 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4522 return gen_rtx_REG (mode, pcum->nregs);
4526 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4528 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4529 ? DOUBLEWORD_ALIGNMENT
4534 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4535 tree type, bool named)
4537 int nregs = pcum->nregs;
4539 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4541 aapcs_layout_arg (pcum, mode, type, named);
4542 return pcum->aapcs_partial;
4545 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4548 if (NUM_ARG_REGS > nregs
4549 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4551 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4556 /* Update the data in PCUM to advance over an argument
4557 of mode MODE and data type TYPE.
4558 (TYPE is null for libcalls where that information may not be available.) */
4561 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4562 const_tree type, bool named)
4564 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4566 aapcs_layout_arg (pcum, mode, type, named);
4568 if (pcum->aapcs_cprc_slot >= 0)
4570 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4572 pcum->aapcs_cprc_slot = -1;
4575 /* Generic stuff. */
4576 pcum->aapcs_arg_processed = false;
4577 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4578 pcum->aapcs_reg = NULL_RTX;
4579 pcum->aapcs_partial = 0;
4584 if (arm_vector_mode_supported_p (mode)
4585 && pcum->named_count > pcum->nargs
4586 && TARGET_IWMMXT_ABI)
4587 pcum->iwmmxt_nregs += 1;
4589 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4593 /* Variable sized types are passed by reference. This is a GCC
4594 extension to the ARM ABI. */
4597 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4598 enum machine_mode mode ATTRIBUTE_UNUSED,
4599 const_tree type, bool named ATTRIBUTE_UNUSED)
4601 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4604 /* Encode the current state of the #pragma [no_]long_calls. */
4607 OFF, /* No #pragma [no_]long_calls is in effect. */
4608 LONG, /* #pragma long_calls is in effect. */
4609 SHORT /* #pragma no_long_calls is in effect. */
4612 static arm_pragma_enum arm_pragma_long_calls = OFF;
4615 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4617 arm_pragma_long_calls = LONG;
4621 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4623 arm_pragma_long_calls = SHORT;
4627 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4629 arm_pragma_long_calls = OFF;
4632 /* Handle an attribute requiring a FUNCTION_DECL;
4633 arguments as in struct attribute_spec.handler. */
4635 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4636 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4638 if (TREE_CODE (*node) != FUNCTION_DECL)
4640 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4642 *no_add_attrs = true;
4648 /* Handle an "interrupt" or "isr" attribute;
4649 arguments as in struct attribute_spec.handler. */
4651 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4656 if (TREE_CODE (*node) != FUNCTION_DECL)
4658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4660 *no_add_attrs = true;
4662 /* FIXME: the argument if any is checked for type attributes;
4663 should it be checked for decl ones? */
4667 if (TREE_CODE (*node) == FUNCTION_TYPE
4668 || TREE_CODE (*node) == METHOD_TYPE)
4670 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4672 warning (OPT_Wattributes, "%qE attribute ignored",
4674 *no_add_attrs = true;
4677 else if (TREE_CODE (*node) == POINTER_TYPE
4678 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4679 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4680 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4682 *node = build_variant_type_copy (*node);
4683 TREE_TYPE (*node) = build_type_attribute_variant
4685 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4686 *no_add_attrs = true;
4690 /* Possibly pass this attribute on from the type to a decl. */
4691 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4692 | (int) ATTR_FLAG_FUNCTION_NEXT
4693 | (int) ATTR_FLAG_ARRAY_NEXT))
4695 *no_add_attrs = true;
4696 return tree_cons (name, args, NULL_TREE);
4700 warning (OPT_Wattributes, "%qE attribute ignored",
4709 /* Handle a "pcs" attribute; arguments as in struct
4710 attribute_spec.handler. */
4712 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4713 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4715 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4717 warning (OPT_Wattributes, "%qE attribute ignored", name);
4718 *no_add_attrs = true;
4723 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4724 /* Handle the "notshared" attribute. This attribute is another way of
4725 requesting hidden visibility. ARM's compiler supports
4726 "__declspec(notshared)"; we support the same thing via an
4730 arm_handle_notshared_attribute (tree *node,
4731 tree name ATTRIBUTE_UNUSED,
4732 tree args ATTRIBUTE_UNUSED,
4733 int flags ATTRIBUTE_UNUSED,
4736 tree decl = TYPE_NAME (*node);
4740 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4741 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4742 *no_add_attrs = false;
4748 /* Return 0 if the attributes for two types are incompatible, 1 if they
4749 are compatible, and 2 if they are nearly compatible (which causes a
4750 warning to be generated). */
4752 arm_comp_type_attributes (const_tree type1, const_tree type2)
4756 /* Check for mismatch of non-default calling convention. */
4757 if (TREE_CODE (type1) != FUNCTION_TYPE)
4760 /* Check for mismatched call attributes. */
4761 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4762 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4763 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4764 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4766 /* Only bother to check if an attribute is defined. */
4767 if (l1 | l2 | s1 | s2)
4769 /* If one type has an attribute, the other must have the same attribute. */
4770 if ((l1 != l2) || (s1 != s2))
4773 /* Disallow mixed attributes. */
4774 if ((l1 & s2) || (l2 & s1))
4778 /* Check for mismatched ISR attribute. */
4779 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4781 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4782 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4784 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4791 /* Assigns default attributes to newly defined type. This is used to
4792 set short_call/long_call attributes for function types of
4793 functions defined inside corresponding #pragma scopes. */
4795 arm_set_default_type_attributes (tree type)
4797 /* Add __attribute__ ((long_call)) to all functions, when
4798 inside #pragma long_calls or __attribute__ ((short_call)),
4799 when inside #pragma no_long_calls. */
4800 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4802 tree type_attr_list, attr_name;
4803 type_attr_list = TYPE_ATTRIBUTES (type);
4805 if (arm_pragma_long_calls == LONG)
4806 attr_name = get_identifier ("long_call");
4807 else if (arm_pragma_long_calls == SHORT)
4808 attr_name = get_identifier ("short_call");
4812 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4813 TYPE_ATTRIBUTES (type) = type_attr_list;
4817 /* Return true if DECL is known to be linked into section SECTION. */
4820 arm_function_in_section_p (tree decl, section *section)
4822 /* We can only be certain about functions defined in the same
4823 compilation unit. */
4824 if (!TREE_STATIC (decl))
4827 /* Make sure that SYMBOL always binds to the definition in this
4828 compilation unit. */
4829 if (!targetm.binds_local_p (decl))
4832 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4833 if (!DECL_SECTION_NAME (decl))
4835 /* Make sure that we will not create a unique section for DECL. */
4836 if (flag_function_sections || DECL_ONE_ONLY (decl))
4840 return function_section (decl) == section;
4843 /* Return nonzero if a 32-bit "long_call" should be generated for
4844 a call from the current function to DECL. We generate a long_call
4847 a. has an __attribute__((long call))
4848 or b. is within the scope of a #pragma long_calls
4849 or c. the -mlong-calls command line switch has been specified
4851 However we do not generate a long call if the function:
4853 d. has an __attribute__ ((short_call))
4854 or e. is inside the scope of a #pragma no_long_calls
4855 or f. is defined in the same section as the current function. */
4858 arm_is_long_call_p (tree decl)
4863 return TARGET_LONG_CALLS;
4865 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4866 if (lookup_attribute ("short_call", attrs))
4869 /* For "f", be conservative, and only cater for cases in which the
4870 whole of the current function is placed in the same section. */
4871 if (!flag_reorder_blocks_and_partition
4872 && TREE_CODE (decl) == FUNCTION_DECL
4873 && arm_function_in_section_p (decl, current_function_section ()))
4876 if (lookup_attribute ("long_call", attrs))
4879 return TARGET_LONG_CALLS;
4882 /* Return nonzero if it is ok to make a tail-call to DECL. */
4884 arm_function_ok_for_sibcall (tree decl, tree exp)
4886 unsigned long func_type;
4888 if (cfun->machine->sibcall_blocked)
4891 /* Never tailcall something for which we have no decl, or if we
4892 are generating code for Thumb-1. */
4893 if (decl == NULL || TARGET_THUMB1)
4896 /* The PIC register is live on entry to VxWorks PLT entries, so we
4897 must make the call before restoring the PIC register. */
4898 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4901 /* Cannot tail-call to long calls, since these are out of range of
4902 a branch instruction. */
4903 if (arm_is_long_call_p (decl))
4906 /* If we are interworking and the function is not declared static
4907 then we can't tail-call it unless we know that it exists in this
4908 compilation unit (since it might be a Thumb routine). */
4909 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4912 func_type = arm_current_func_type ();
4913 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4914 if (IS_INTERRUPT (func_type))
4917 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4919 /* Check that the return value locations are the same. For
4920 example that we aren't returning a value from the sibling in
4921 a VFP register but then need to transfer it to a core
4925 a = arm_function_value (TREE_TYPE (exp), decl, false);
4926 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4928 if (!rtx_equal_p (a, b))
4932 /* Never tailcall if function may be called with a misaligned SP. */
4933 if (IS_STACKALIGN (func_type))
4936 /* Everything else is ok. */
4941 /* Addressing mode support functions. */
4943 /* Return nonzero if X is a legitimate immediate operand when compiling
4944 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4946 legitimate_pic_operand_p (rtx x)
4948 if (GET_CODE (x) == SYMBOL_REF
4949 || (GET_CODE (x) == CONST
4950 && GET_CODE (XEXP (x, 0)) == PLUS
4951 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4957 /* Record that the current function needs a PIC register. Initialize
4958 cfun->machine->pic_reg if we have not already done so. */
4961 require_pic_register (void)
4963 /* A lot of the logic here is made obscure by the fact that this
4964 routine gets called as part of the rtx cost estimation process.
4965 We don't want those calls to affect any assumptions about the real
4966 function; and further, we can't call entry_of_function() until we
4967 start the real expansion process. */
4968 if (!crtl->uses_pic_offset_table)
4970 gcc_assert (can_create_pseudo_p ());
4971 if (arm_pic_register != INVALID_REGNUM)
4973 if (!cfun->machine->pic_reg)
4974 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4976 /* Play games to avoid marking the function as needing pic
4977 if we are being called as part of the cost-estimation
4979 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4980 crtl->uses_pic_offset_table = 1;
4986 if (!cfun->machine->pic_reg)
4987 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4989 /* Play games to avoid marking the function as needing pic
4990 if we are being called as part of the cost-estimation
4992 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4994 crtl->uses_pic_offset_table = 1;
4997 arm_load_pic_register (0UL);
5002 for (insn = seq; insn; insn = NEXT_INSN (insn))
5004 INSN_LOCATOR (insn) = prologue_locator;
5006 /* We can be called during expansion of PHI nodes, where
5007 we can't yet emit instructions directly in the final
5008 insn stream. Queue the insns on the entry edge, they will
5009 be committed after everything else is expanded. */
5010 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5017 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5019 if (GET_CODE (orig) == SYMBOL_REF
5020 || GET_CODE (orig) == LABEL_REF)
5026 gcc_assert (can_create_pseudo_p ());
5027 reg = gen_reg_rtx (Pmode);
5030 /* VxWorks does not impose a fixed gap between segments; the run-time
5031 gap can be different from the object-file gap. We therefore can't
5032 use GOTOFF unless we are absolutely sure that the symbol is in the
5033 same segment as the GOT. Unfortunately, the flexibility of linker
5034 scripts means that we can't be sure of that in general, so assume
5035 that GOTOFF is never valid on VxWorks. */
5036 if ((GET_CODE (orig) == LABEL_REF
5037 || (GET_CODE (orig) == SYMBOL_REF &&
5038 SYMBOL_REF_LOCAL_P (orig)))
5040 && !TARGET_VXWORKS_RTP)
5041 insn = arm_pic_static_addr (orig, reg);
5047 /* If this function doesn't have a pic register, create one now. */
5048 require_pic_register ();
5050 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5052 /* Make the MEM as close to a constant as possible. */
5053 mem = SET_SRC (pat);
5054 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5055 MEM_READONLY_P (mem) = 1;
5056 MEM_NOTRAP_P (mem) = 1;
5058 insn = emit_insn (pat);
5061 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5063 set_unique_reg_note (insn, REG_EQUAL, orig);
5067 else if (GET_CODE (orig) == CONST)
5071 if (GET_CODE (XEXP (orig, 0)) == PLUS
5072 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5075 /* Handle the case where we have: const (UNSPEC_TLS). */
5076 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5077 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5080 /* Handle the case where we have:
5081 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5083 if (GET_CODE (XEXP (orig, 0)) == PLUS
5084 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5085 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5087 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5093 gcc_assert (can_create_pseudo_p ());
5094 reg = gen_reg_rtx (Pmode);
5097 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5099 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5100 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5101 base == reg ? 0 : reg);
5103 if (GET_CODE (offset) == CONST_INT)
5105 /* The base register doesn't really matter, we only want to
5106 test the index for the appropriate mode. */
5107 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5109 gcc_assert (can_create_pseudo_p ());
5110 offset = force_reg (Pmode, offset);
5113 if (GET_CODE (offset) == CONST_INT)
5114 return plus_constant (base, INTVAL (offset));
5117 if (GET_MODE_SIZE (mode) > 4
5118 && (GET_MODE_CLASS (mode) == MODE_INT
5119 || TARGET_SOFT_FLOAT))
5121 emit_insn (gen_addsi3 (reg, base, offset));
5125 return gen_rtx_PLUS (Pmode, base, offset);
5132 /* Find a spare register to use during the prolog of a function. */
5135 thumb_find_work_register (unsigned long pushed_regs_mask)
5139 /* Check the argument registers first as these are call-used. The
5140 register allocation order means that sometimes r3 might be used
5141 but earlier argument registers might not, so check them all. */
5142 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5143 if (!df_regs_ever_live_p (reg))
5146 /* Before going on to check the call-saved registers we can try a couple
5147 more ways of deducing that r3 is available. The first is when we are
5148 pushing anonymous arguments onto the stack and we have less than 4
5149 registers worth of fixed arguments(*). In this case r3 will be part of
5150 the variable argument list and so we can be sure that it will be
5151 pushed right at the start of the function. Hence it will be available
5152 for the rest of the prologue.
5153 (*): ie crtl->args.pretend_args_size is greater than 0. */
5154 if (cfun->machine->uses_anonymous_args
5155 && crtl->args.pretend_args_size > 0)
5156 return LAST_ARG_REGNUM;
5158 /* The other case is when we have fixed arguments but less than 4 registers
5159 worth. In this case r3 might be used in the body of the function, but
5160 it is not being used to convey an argument into the function. In theory
5161 we could just check crtl->args.size to see how many bytes are
5162 being passed in argument registers, but it seems that it is unreliable.
5163 Sometimes it will have the value 0 when in fact arguments are being
5164 passed. (See testcase execute/20021111-1.c for an example). So we also
5165 check the args_info.nregs field as well. The problem with this field is
5166 that it makes no allowances for arguments that are passed to the
5167 function but which are not used. Hence we could miss an opportunity
5168 when a function has an unused argument in r3. But it is better to be
5169 safe than to be sorry. */
5170 if (! cfun->machine->uses_anonymous_args
5171 && crtl->args.size >= 0
5172 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5173 && crtl->args.info.nregs < 4)
5174 return LAST_ARG_REGNUM;
5176 /* Otherwise look for a call-saved register that is going to be pushed. */
5177 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5178 if (pushed_regs_mask & (1 << reg))
5183 /* Thumb-2 can use high regs. */
5184 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5185 if (pushed_regs_mask & (1 << reg))
5188 /* Something went wrong - thumb_compute_save_reg_mask()
5189 should have arranged for a suitable register to be pushed. */
5193 static GTY(()) int pic_labelno;
5195 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5199 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5201 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5203 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5206 gcc_assert (flag_pic);
5208 pic_reg = cfun->machine->pic_reg;
5209 if (TARGET_VXWORKS_RTP)
5211 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5212 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5213 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5215 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5217 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5218 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5222 /* We use an UNSPEC rather than a LABEL_REF because this label
5223 never appears in the code stream. */
5225 labelno = GEN_INT (pic_labelno++);
5226 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5227 l1 = gen_rtx_CONST (VOIDmode, l1);
5229 /* On the ARM the PC register contains 'dot + 8' at the time of the
5230 addition, on the Thumb it is 'dot + 4'. */
5231 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5232 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5234 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5238 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5240 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5242 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5244 else /* TARGET_THUMB1 */
5246 if (arm_pic_register != INVALID_REGNUM
5247 && REGNO (pic_reg) > LAST_LO_REGNUM)
5249 /* We will have pushed the pic register, so we should always be
5250 able to find a work register. */
5251 pic_tmp = gen_rtx_REG (SImode,
5252 thumb_find_work_register (saved_regs));
5253 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5254 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5257 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5258 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5262 /* Need to emit this whether or not we obey regdecls,
5263 since setjmp/longjmp can cause life info to screw up. */
5267 /* Generate code to load the address of a static var when flag_pic is set. */
5269 arm_pic_static_addr (rtx orig, rtx reg)
5271 rtx l1, labelno, offset_rtx, insn;
5273 gcc_assert (flag_pic);
5275 /* We use an UNSPEC rather than a LABEL_REF because this label
5276 never appears in the code stream. */
5277 labelno = GEN_INT (pic_labelno++);
5278 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5279 l1 = gen_rtx_CONST (VOIDmode, l1);
5281 /* On the ARM the PC register contains 'dot + 8' at the time of the
5282 addition, on the Thumb it is 'dot + 4'. */
5283 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5284 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5285 UNSPEC_SYMBOL_OFFSET);
5286 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5290 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5292 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5294 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5296 else /* TARGET_THUMB1 */
5298 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5299 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5305 /* Return nonzero if X is valid as an ARM state addressing register. */
5307 arm_address_register_rtx_p (rtx x, int strict_p)
5311 if (GET_CODE (x) != REG)
5317 return ARM_REGNO_OK_FOR_BASE_P (regno);
5319 return (regno <= LAST_ARM_REGNUM
5320 || regno >= FIRST_PSEUDO_REGISTER
5321 || regno == FRAME_POINTER_REGNUM
5322 || regno == ARG_POINTER_REGNUM);
5325 /* Return TRUE if this rtx is the difference of a symbol and a label,
5326 and will reduce to a PC-relative relocation in the object file.
5327 Expressions like this can be left alone when generating PIC, rather
5328 than forced through the GOT. */
5330 pcrel_constant_p (rtx x)
5332 if (GET_CODE (x) == MINUS)
5333 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5338 /* Return true if X will surely end up in an index register after next
5341 will_be_in_index_register (const_rtx x)
5343 /* arm.md: calculate_pic_address will split this into a register. */
5344 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5347 /* Return nonzero if X is a valid ARM state address operand. */
5349 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5353 enum rtx_code code = GET_CODE (x);
5355 if (arm_address_register_rtx_p (x, strict_p))
5358 use_ldrd = (TARGET_LDRD
5360 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5362 if (code == POST_INC || code == PRE_DEC
5363 || ((code == PRE_INC || code == POST_DEC)
5364 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5365 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5367 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5368 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5369 && GET_CODE (XEXP (x, 1)) == PLUS
5370 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5372 rtx addend = XEXP (XEXP (x, 1), 1);
5374 /* Don't allow ldrd post increment by register because it's hard
5375 to fixup invalid register choices. */
5377 && GET_CODE (x) == POST_MODIFY
5378 && GET_CODE (addend) == REG)
5381 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5382 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5385 /* After reload constants split into minipools will have addresses
5386 from a LABEL_REF. */
5387 else if (reload_completed
5388 && (code == LABEL_REF
5390 && GET_CODE (XEXP (x, 0)) == PLUS
5391 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5392 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5395 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5398 else if (code == PLUS)
5400 rtx xop0 = XEXP (x, 0);
5401 rtx xop1 = XEXP (x, 1);
5403 return ((arm_address_register_rtx_p (xop0, strict_p)
5404 && ((GET_CODE(xop1) == CONST_INT
5405 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5406 || (!strict_p && will_be_in_index_register (xop1))))
5407 || (arm_address_register_rtx_p (xop1, strict_p)
5408 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5412 /* Reload currently can't handle MINUS, so disable this for now */
5413 else if (GET_CODE (x) == MINUS)
5415 rtx xop0 = XEXP (x, 0);
5416 rtx xop1 = XEXP (x, 1);
5418 return (arm_address_register_rtx_p (xop0, strict_p)
5419 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5423 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5424 && code == SYMBOL_REF
5425 && CONSTANT_POOL_ADDRESS_P (x)
5427 && symbol_mentioned_p (get_pool_constant (x))
5428 && ! pcrel_constant_p (get_pool_constant (x))))
5434 /* Return nonzero if X is a valid Thumb-2 address operand. */
5436 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5439 enum rtx_code code = GET_CODE (x);
5441 if (arm_address_register_rtx_p (x, strict_p))
5444 use_ldrd = (TARGET_LDRD
5446 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5448 if (code == POST_INC || code == PRE_DEC
5449 || ((code == PRE_INC || code == POST_DEC)
5450 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5451 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5453 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5454 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5455 && GET_CODE (XEXP (x, 1)) == PLUS
5456 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5458 /* Thumb-2 only has autoincrement by constant. */
5459 rtx addend = XEXP (XEXP (x, 1), 1);
5460 HOST_WIDE_INT offset;
5462 if (GET_CODE (addend) != CONST_INT)
5465 offset = INTVAL(addend);
5466 if (GET_MODE_SIZE (mode) <= 4)
5467 return (offset > -256 && offset < 256);
5469 return (use_ldrd && offset > -1024 && offset < 1024
5470 && (offset & 3) == 0);
5473 /* After reload constants split into minipools will have addresses
5474 from a LABEL_REF. */
5475 else if (reload_completed
5476 && (code == LABEL_REF
5478 && GET_CODE (XEXP (x, 0)) == PLUS
5479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5483 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5486 else if (code == PLUS)
5488 rtx xop0 = XEXP (x, 0);
5489 rtx xop1 = XEXP (x, 1);
5491 return ((arm_address_register_rtx_p (xop0, strict_p)
5492 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5493 || (!strict_p && will_be_in_index_register (xop1))))
5494 || (arm_address_register_rtx_p (xop1, strict_p)
5495 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5498 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5499 && code == SYMBOL_REF
5500 && CONSTANT_POOL_ADDRESS_P (x)
5502 && symbol_mentioned_p (get_pool_constant (x))
5503 && ! pcrel_constant_p (get_pool_constant (x))))
5509 /* Return nonzero if INDEX is valid for an address index operand in
5512 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5515 HOST_WIDE_INT range;
5516 enum rtx_code code = GET_CODE (index);
5518 /* Standard coprocessor addressing modes. */
5519 if (TARGET_HARD_FLOAT
5520 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5521 && (mode == SFmode || mode == DFmode
5522 || (TARGET_MAVERICK && mode == DImode)))
5523 return (code == CONST_INT && INTVAL (index) < 1024
5524 && INTVAL (index) > -1024
5525 && (INTVAL (index) & 3) == 0);
5527 /* For quad modes, we restrict the constant offset to be slightly less
5528 than what the instruction format permits. We do this because for
5529 quad mode moves, we will actually decompose them into two separate
5530 double-mode reads or writes. INDEX must therefore be a valid
5531 (double-mode) offset and so should INDEX+8. */
5532 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5533 return (code == CONST_INT
5534 && INTVAL (index) < 1016
5535 && INTVAL (index) > -1024
5536 && (INTVAL (index) & 3) == 0);
5538 /* We have no such constraint on double mode offsets, so we permit the
5539 full range of the instruction format. */
5540 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5541 return (code == CONST_INT
5542 && INTVAL (index) < 1024
5543 && INTVAL (index) > -1024
5544 && (INTVAL (index) & 3) == 0);
5546 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5547 return (code == CONST_INT
5548 && INTVAL (index) < 1024
5549 && INTVAL (index) > -1024
5550 && (INTVAL (index) & 3) == 0);
5552 if (arm_address_register_rtx_p (index, strict_p)
5553 && (GET_MODE_SIZE (mode) <= 4))
5556 if (mode == DImode || mode == DFmode)
5558 if (code == CONST_INT)
5560 HOST_WIDE_INT val = INTVAL (index);
5563 return val > -256 && val < 256;
5565 return val > -4096 && val < 4092;
5568 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5571 if (GET_MODE_SIZE (mode) <= 4
5575 || (mode == QImode && outer == SIGN_EXTEND))))
5579 rtx xiop0 = XEXP (index, 0);
5580 rtx xiop1 = XEXP (index, 1);
5582 return ((arm_address_register_rtx_p (xiop0, strict_p)
5583 && power_of_two_operand (xiop1, SImode))
5584 || (arm_address_register_rtx_p (xiop1, strict_p)
5585 && power_of_two_operand (xiop0, SImode)));
5587 else if (code == LSHIFTRT || code == ASHIFTRT
5588 || code == ASHIFT || code == ROTATERT)
5590 rtx op = XEXP (index, 1);
5592 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5593 && GET_CODE (op) == CONST_INT
5595 && INTVAL (op) <= 31);
5599 /* For ARM v4 we may be doing a sign-extend operation during the
5605 || (outer == SIGN_EXTEND && mode == QImode))
5611 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5613 return (code == CONST_INT
5614 && INTVAL (index) < range
5615 && INTVAL (index) > -range);
5618 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5619 index operand. i.e. 1, 2, 4 or 8. */
5621 thumb2_index_mul_operand (rtx op)
5625 if (GET_CODE(op) != CONST_INT)
5629 return (val == 1 || val == 2 || val == 4 || val == 8);
5632 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5634 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5636 enum rtx_code code = GET_CODE (index);
5638 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5639 /* Standard coprocessor addressing modes. */
5640 if (TARGET_HARD_FLOAT
5641 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5642 && (mode == SFmode || mode == DFmode
5643 || (TARGET_MAVERICK && mode == DImode)))
5644 return (code == CONST_INT && INTVAL (index) < 1024
5645 /* Thumb-2 allows only > -256 index range for it's core register
5646 load/stores. Since we allow SF/DF in core registers, we have
5647 to use the intersection between -256~4096 (core) and -1024~1024
5649 && INTVAL (index) > -256
5650 && (INTVAL (index) & 3) == 0);
5652 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5654 /* For DImode assume values will usually live in core regs
5655 and only allow LDRD addressing modes. */
5656 if (!TARGET_LDRD || mode != DImode)
5657 return (code == CONST_INT
5658 && INTVAL (index) < 1024
5659 && INTVAL (index) > -1024
5660 && (INTVAL (index) & 3) == 0);
5663 /* For quad modes, we restrict the constant offset to be slightly less
5664 than what the instruction format permits. We do this because for
5665 quad mode moves, we will actually decompose them into two separate
5666 double-mode reads or writes. INDEX must therefore be a valid
5667 (double-mode) offset and so should INDEX+8. */
5668 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5669 return (code == CONST_INT
5670 && INTVAL (index) < 1016
5671 && INTVAL (index) > -1024
5672 && (INTVAL (index) & 3) == 0);
5674 /* We have no such constraint on double mode offsets, so we permit the
5675 full range of the instruction format. */
5676 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1024
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 if (arm_address_register_rtx_p (index, strict_p)
5683 && (GET_MODE_SIZE (mode) <= 4))
5686 if (mode == DImode || mode == DFmode)
5688 if (code == CONST_INT)
5690 HOST_WIDE_INT val = INTVAL (index);
5691 /* ??? Can we assume ldrd for thumb2? */
5692 /* Thumb-2 ldrd only has reg+const addressing modes. */
5693 /* ldrd supports offsets of +-1020.
5694 However the ldr fallback does not. */
5695 return val > -256 && val < 256 && (val & 3) == 0;
5703 rtx xiop0 = XEXP (index, 0);
5704 rtx xiop1 = XEXP (index, 1);
5706 return ((arm_address_register_rtx_p (xiop0, strict_p)
5707 && thumb2_index_mul_operand (xiop1))
5708 || (arm_address_register_rtx_p (xiop1, strict_p)
5709 && thumb2_index_mul_operand (xiop0)));
5711 else if (code == ASHIFT)
5713 rtx op = XEXP (index, 1);
5715 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5716 && GET_CODE (op) == CONST_INT
5718 && INTVAL (op) <= 3);
5721 return (code == CONST_INT
5722 && INTVAL (index) < 4096
5723 && INTVAL (index) > -256);
5726 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5728 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5732 if (GET_CODE (x) != REG)
5738 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5740 return (regno <= LAST_LO_REGNUM
5741 || regno > LAST_VIRTUAL_REGISTER
5742 || regno == FRAME_POINTER_REGNUM
5743 || (GET_MODE_SIZE (mode) >= 4
5744 && (regno == STACK_POINTER_REGNUM
5745 || regno >= FIRST_PSEUDO_REGISTER
5746 || x == hard_frame_pointer_rtx
5747 || x == arg_pointer_rtx)));
5750 /* Return nonzero if x is a legitimate index register. This is the case
5751 for any base register that can access a QImode object. */
5753 thumb1_index_register_rtx_p (rtx x, int strict_p)
5755 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5758 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5760 The AP may be eliminated to either the SP or the FP, so we use the
5761 least common denominator, e.g. SImode, and offsets from 0 to 64.
5763 ??? Verify whether the above is the right approach.
5765 ??? Also, the FP may be eliminated to the SP, so perhaps that
5766 needs special handling also.
5768 ??? Look at how the mips16 port solves this problem. It probably uses
5769 better ways to solve some of these problems.
5771 Although it is not incorrect, we don't accept QImode and HImode
5772 addresses based on the frame pointer or arg pointer until the
5773 reload pass starts. This is so that eliminating such addresses
5774 into stack based ones won't produce impossible code. */
5776 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 /* ??? Not clear if this is right. Experiment. */
5779 if (GET_MODE_SIZE (mode) < 4
5780 && !(reload_in_progress || reload_completed)
5781 && (reg_mentioned_p (frame_pointer_rtx, x)
5782 || reg_mentioned_p (arg_pointer_rtx, x)
5783 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5784 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5785 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5786 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5789 /* Accept any base register. SP only in SImode or larger. */
5790 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5793 /* This is PC relative data before arm_reorg runs. */
5794 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5795 && GET_CODE (x) == SYMBOL_REF
5796 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5799 /* This is PC relative data after arm_reorg runs. */
5800 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5802 && (GET_CODE (x) == LABEL_REF
5803 || (GET_CODE (x) == CONST
5804 && GET_CODE (XEXP (x, 0)) == PLUS
5805 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5809 /* Post-inc indexing only supported for SImode and larger. */
5810 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5811 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5814 else if (GET_CODE (x) == PLUS)
5816 /* REG+REG address can be any two index registers. */
5817 /* We disallow FRAME+REG addressing since we know that FRAME
5818 will be replaced with STACK, and SP relative addressing only
5819 permits SP+OFFSET. */
5820 if (GET_MODE_SIZE (mode) <= 4
5821 && XEXP (x, 0) != frame_pointer_rtx
5822 && XEXP (x, 1) != frame_pointer_rtx
5823 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5824 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5825 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5828 /* REG+const has 5-7 bit offset for non-SP registers. */
5829 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5830 || XEXP (x, 0) == arg_pointer_rtx)
5831 && GET_CODE (XEXP (x, 1)) == CONST_INT
5832 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5835 /* REG+const has 10-bit offset for SP, but only SImode and
5836 larger is supported. */
5837 /* ??? Should probably check for DI/DFmode overflow here
5838 just like GO_IF_LEGITIMATE_OFFSET does. */
5839 else if (GET_CODE (XEXP (x, 0)) == REG
5840 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5841 && GET_MODE_SIZE (mode) >= 4
5842 && GET_CODE (XEXP (x, 1)) == CONST_INT
5843 && INTVAL (XEXP (x, 1)) >= 0
5844 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5845 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5848 else if (GET_CODE (XEXP (x, 0)) == REG
5849 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5850 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5851 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5852 && REGNO (XEXP (x, 0))
5853 <= LAST_VIRTUAL_POINTER_REGISTER))
5854 && GET_MODE_SIZE (mode) >= 4
5855 && GET_CODE (XEXP (x, 1)) == CONST_INT
5856 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5860 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5861 && GET_MODE_SIZE (mode) == 4
5862 && GET_CODE (x) == SYMBOL_REF
5863 && CONSTANT_POOL_ADDRESS_P (x)
5865 && symbol_mentioned_p (get_pool_constant (x))
5866 && ! pcrel_constant_p (get_pool_constant (x))))
5872 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5873 instruction of mode MODE. */
5875 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5877 switch (GET_MODE_SIZE (mode))
5880 return val >= 0 && val < 32;
5883 return val >= 0 && val < 64 && (val & 1) == 0;
5887 && (val + GET_MODE_SIZE (mode)) <= 128
5893 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5896 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5897 else if (TARGET_THUMB2)
5898 return thumb2_legitimate_address_p (mode, x, strict_p);
5899 else /* if (TARGET_THUMB1) */
5900 return thumb1_legitimate_address_p (mode, x, strict_p);
5903 /* Build the SYMBOL_REF for __tls_get_addr. */
5905 static GTY(()) rtx tls_get_addr_libfunc;
5908 get_tls_get_addr (void)
5910 if (!tls_get_addr_libfunc)
5911 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5912 return tls_get_addr_libfunc;
5916 arm_load_tp (rtx target)
5919 target = gen_reg_rtx (SImode);
5923 /* Can return in any reg. */
5924 emit_insn (gen_load_tp_hard (target));
5928 /* Always returned in r0. Immediately copy the result into a pseudo,
5929 otherwise other uses of r0 (e.g. setting up function arguments) may
5930 clobber the value. */
5934 emit_insn (gen_load_tp_soft ());
5936 tmp = gen_rtx_REG (SImode, 0);
5937 emit_move_insn (target, tmp);
5943 load_tls_operand (rtx x, rtx reg)
5947 if (reg == NULL_RTX)
5948 reg = gen_reg_rtx (SImode);
5950 tmp = gen_rtx_CONST (SImode, x);
5952 emit_move_insn (reg, tmp);
5958 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5960 rtx insns, label, labelno, sum;
5964 labelno = GEN_INT (pic_labelno++);
5965 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5966 label = gen_rtx_CONST (VOIDmode, label);
5968 sum = gen_rtx_UNSPEC (Pmode,
5969 gen_rtvec (4, x, GEN_INT (reloc), label,
5970 GEN_INT (TARGET_ARM ? 8 : 4)),
5972 reg = load_tls_operand (sum, reg);
5975 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5976 else if (TARGET_THUMB2)
5977 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5978 else /* TARGET_THUMB1 */
5979 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5981 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5982 Pmode, 1, reg, Pmode);
5984 insns = get_insns ();
5991 legitimize_tls_address (rtx x, rtx reg)
5993 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5994 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5998 case TLS_MODEL_GLOBAL_DYNAMIC:
5999 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6000 dest = gen_reg_rtx (Pmode);
6001 emit_libcall_block (insns, dest, ret, x);
6004 case TLS_MODEL_LOCAL_DYNAMIC:
6005 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6007 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6008 share the LDM result with other LD model accesses. */
6009 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6011 dest = gen_reg_rtx (Pmode);
6012 emit_libcall_block (insns, dest, ret, eqv);
6014 /* Load the addend. */
6015 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6017 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6018 return gen_rtx_PLUS (Pmode, dest, addend);
6020 case TLS_MODEL_INITIAL_EXEC:
6021 labelno = GEN_INT (pic_labelno++);
6022 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6023 label = gen_rtx_CONST (VOIDmode, label);
6024 sum = gen_rtx_UNSPEC (Pmode,
6025 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6026 GEN_INT (TARGET_ARM ? 8 : 4)),
6028 reg = load_tls_operand (sum, reg);
6031 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6032 else if (TARGET_THUMB2)
6033 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6036 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6037 emit_move_insn (reg, gen_const_mem (SImode, reg));
6040 tp = arm_load_tp (NULL_RTX);
6042 return gen_rtx_PLUS (Pmode, tp, reg);
6044 case TLS_MODEL_LOCAL_EXEC:
6045 tp = arm_load_tp (NULL_RTX);
6047 reg = gen_rtx_UNSPEC (Pmode,
6048 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6050 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6052 return gen_rtx_PLUS (Pmode, tp, reg);
6059 /* Try machine-dependent ways of modifying an illegitimate address
6060 to be legitimate. If we find one, return the new, valid address. */
6062 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6066 /* TODO: legitimize_address for Thumb2. */
6069 return thumb_legitimize_address (x, orig_x, mode);
6072 if (arm_tls_symbol_p (x))
6073 return legitimize_tls_address (x, NULL_RTX);
6075 if (GET_CODE (x) == PLUS)
6077 rtx xop0 = XEXP (x, 0);
6078 rtx xop1 = XEXP (x, 1);
6080 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6081 xop0 = force_reg (SImode, xop0);
6083 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6084 xop1 = force_reg (SImode, xop1);
6086 if (ARM_BASE_REGISTER_RTX_P (xop0)
6087 && GET_CODE (xop1) == CONST_INT)
6089 HOST_WIDE_INT n, low_n;
6093 /* VFP addressing modes actually allow greater offsets, but for
6094 now we just stick with the lowest common denominator. */
6096 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6108 low_n = ((mode) == TImode ? 0
6109 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6113 base_reg = gen_reg_rtx (SImode);
6114 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6115 emit_move_insn (base_reg, val);
6116 x = plus_constant (base_reg, low_n);
6118 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6119 x = gen_rtx_PLUS (SImode, xop0, xop1);
6122 /* XXX We don't allow MINUS any more -- see comment in
6123 arm_legitimate_address_outer_p (). */
6124 else if (GET_CODE (x) == MINUS)
6126 rtx xop0 = XEXP (x, 0);
6127 rtx xop1 = XEXP (x, 1);
6129 if (CONSTANT_P (xop0))
6130 xop0 = force_reg (SImode, xop0);
6132 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6133 xop1 = force_reg (SImode, xop1);
6135 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6136 x = gen_rtx_MINUS (SImode, xop0, xop1);
6139 /* Make sure to take full advantage of the pre-indexed addressing mode
6140 with absolute addresses which often allows for the base register to
6141 be factorized for multiple adjacent memory references, and it might
6142 even allows for the mini pool to be avoided entirely. */
6143 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6146 HOST_WIDE_INT mask, base, index;
6149 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6150 use a 8-bit index. So let's use a 12-bit index for SImode only and
6151 hope that arm_gen_constant will enable ldrb to use more bits. */
6152 bits = (mode == SImode) ? 12 : 8;
6153 mask = (1 << bits) - 1;
6154 base = INTVAL (x) & ~mask;
6155 index = INTVAL (x) & mask;
6156 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6158 /* It'll most probably be more efficient to generate the base
6159 with more bits set and use a negative index instead. */
6163 base_reg = force_reg (SImode, GEN_INT (base));
6164 x = plus_constant (base_reg, index);
6169 /* We need to find and carefully transform any SYMBOL and LABEL
6170 references; so go back to the original address expression. */
6171 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6173 if (new_x != orig_x)
6181 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6182 to be legitimate. If we find one, return the new, valid address. */
6184 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6186 if (arm_tls_symbol_p (x))
6187 return legitimize_tls_address (x, NULL_RTX);
6189 if (GET_CODE (x) == PLUS
6190 && GET_CODE (XEXP (x, 1)) == CONST_INT
6191 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6192 || INTVAL (XEXP (x, 1)) < 0))
6194 rtx xop0 = XEXP (x, 0);
6195 rtx xop1 = XEXP (x, 1);
6196 HOST_WIDE_INT offset = INTVAL (xop1);
6198 /* Try and fold the offset into a biasing of the base register and
6199 then offsetting that. Don't do this when optimizing for space
6200 since it can cause too many CSEs. */
6201 if (optimize_size && offset >= 0
6202 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6204 HOST_WIDE_INT delta;
6207 delta = offset - (256 - GET_MODE_SIZE (mode));
6208 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6209 delta = 31 * GET_MODE_SIZE (mode);
6211 delta = offset & (~31 * GET_MODE_SIZE (mode));
6213 xop0 = force_operand (plus_constant (xop0, offset - delta),
6215 x = plus_constant (xop0, delta);
6217 else if (offset < 0 && offset > -256)
6218 /* Small negative offsets are best done with a subtract before the
6219 dereference, forcing these into a register normally takes two
6221 x = force_operand (x, NULL_RTX);
6224 /* For the remaining cases, force the constant into a register. */
6225 xop1 = force_reg (SImode, xop1);
6226 x = gen_rtx_PLUS (SImode, xop0, xop1);
6229 else if (GET_CODE (x) == PLUS
6230 && s_register_operand (XEXP (x, 1), SImode)
6231 && !s_register_operand (XEXP (x, 0), SImode))
6233 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6235 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6240 /* We need to find and carefully transform any SYMBOL and LABEL
6241 references; so go back to the original address expression. */
6242 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6244 if (new_x != orig_x)
6252 arm_legitimize_reload_address (rtx *p,
6253 enum machine_mode mode,
6254 int opnum, int type,
6255 int ind_levels ATTRIBUTE_UNUSED)
6257 if (GET_CODE (*p) == PLUS
6258 && GET_CODE (XEXP (*p, 0)) == REG
6259 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6260 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6262 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6263 HOST_WIDE_INT low, high;
6265 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
6266 low = ((val & 0xf) ^ 0x8) - 0x8;
6267 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
6268 /* Need to be careful, -256 is not a valid offset. */
6269 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6270 else if (mode == SImode
6271 || (mode == SFmode && TARGET_SOFT_FLOAT)
6272 || ((mode == HImode || mode == QImode) && ! arm_arch4))
6273 /* Need to be careful, -4096 is not a valid offset. */
6274 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
6275 else if ((mode == HImode || mode == QImode) && arm_arch4)
6276 /* Need to be careful, -256 is not a valid offset. */
6277 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6278 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6279 && TARGET_HARD_FLOAT && TARGET_FPA)
6280 /* Need to be careful, -1024 is not a valid offset. */
6281 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
6285 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6286 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6287 - (unsigned HOST_WIDE_INT) 0x80000000);
6288 /* Check for overflow or zero */
6289 if (low == 0 || high == 0 || (high + low != val))
6292 /* Reload the high part into a base reg; leave the low part
6294 *p = gen_rtx_PLUS (GET_MODE (*p),
6295 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6298 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6299 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6300 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6308 thumb_legitimize_reload_address (rtx *x_p,
6309 enum machine_mode mode,
6310 int opnum, int type,
6311 int ind_levels ATTRIBUTE_UNUSED)
6315 if (GET_CODE (x) == PLUS
6316 && GET_MODE_SIZE (mode) < 4
6317 && REG_P (XEXP (x, 0))
6318 && XEXP (x, 0) == stack_pointer_rtx
6319 && GET_CODE (XEXP (x, 1)) == CONST_INT
6320 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6325 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6326 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6330 /* If both registers are hi-regs, then it's better to reload the
6331 entire expression rather than each register individually. That
6332 only requires one reload register rather than two. */
6333 if (GET_CODE (x) == PLUS
6334 && REG_P (XEXP (x, 0))
6335 && REG_P (XEXP (x, 1))
6336 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6337 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6342 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6343 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6350 /* Test for various thread-local symbols. */
6352 /* Return TRUE if X is a thread-local symbol. */
6355 arm_tls_symbol_p (rtx x)
6357 if (! TARGET_HAVE_TLS)
6360 if (GET_CODE (x) != SYMBOL_REF)
6363 return SYMBOL_REF_TLS_MODEL (x) != 0;
6366 /* Helper for arm_tls_referenced_p. */
6369 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6371 if (GET_CODE (*x) == SYMBOL_REF)
6372 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6374 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6375 TLS offsets, not real symbol references. */
6376 if (GET_CODE (*x) == UNSPEC
6377 && XINT (*x, 1) == UNSPEC_TLS)
6383 /* Return TRUE if X contains any TLS symbol references. */
6386 arm_tls_referenced_p (rtx x)
6388 if (! TARGET_HAVE_TLS)
6391 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6394 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6397 arm_cannot_force_const_mem (rtx x)
6401 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6403 split_const (x, &base, &offset);
6404 if (GET_CODE (base) == SYMBOL_REF
6405 && !offset_within_block_p (base, INTVAL (offset)))
6408 return arm_tls_referenced_p (x);
6411 #define REG_OR_SUBREG_REG(X) \
6412 (GET_CODE (X) == REG \
6413 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6415 #define REG_OR_SUBREG_RTX(X) \
6416 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6419 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6421 enum machine_mode mode = GET_MODE (x);
6435 return COSTS_N_INSNS (1);
6438 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6441 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6448 return COSTS_N_INSNS (2) + cycles;
6450 return COSTS_N_INSNS (1) + 16;
6453 return (COSTS_N_INSNS (1)
6454 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6455 + GET_CODE (SET_DEST (x)) == MEM));
6460 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6462 if (thumb_shiftable_const (INTVAL (x)))
6463 return COSTS_N_INSNS (2);
6464 return COSTS_N_INSNS (3);
6466 else if ((outer == PLUS || outer == COMPARE)
6467 && INTVAL (x) < 256 && INTVAL (x) > -256)
6469 else if ((outer == IOR || outer == XOR || outer == AND)
6470 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6471 return COSTS_N_INSNS (1);
6472 else if (outer == AND)
6475 /* This duplicates the tests in the andsi3 expander. */
6476 for (i = 9; i <= 31; i++)
6477 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6478 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6479 return COSTS_N_INSNS (2);
6481 else if (outer == ASHIFT || outer == ASHIFTRT
6482 || outer == LSHIFTRT)
6484 return COSTS_N_INSNS (2);
6490 return COSTS_N_INSNS (3);
6508 /* XXX another guess. */
6509 /* Memory costs quite a lot for the first word, but subsequent words
6510 load at the equivalent of a single insn each. */
6511 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6512 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6517 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6523 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6524 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6530 return total + COSTS_N_INSNS (1);
6532 /* Assume a two-shift sequence. Increase the cost slightly so
6533 we prefer actual shifts over an extend operation. */
6534 return total + 1 + COSTS_N_INSNS (2);
6542 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6544 enum machine_mode mode = GET_MODE (x);
6545 enum rtx_code subcode;
6547 enum rtx_code code = GET_CODE (x);
6553 /* Memory costs quite a lot for the first word, but subsequent words
6554 load at the equivalent of a single insn each. */
6555 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6562 if (TARGET_HARD_FLOAT && mode == SFmode)
6563 *total = COSTS_N_INSNS (2);
6564 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6565 *total = COSTS_N_INSNS (4);
6567 *total = COSTS_N_INSNS (20);
6571 if (GET_CODE (XEXP (x, 1)) == REG)
6572 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6573 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6574 *total = rtx_cost (XEXP (x, 1), code, speed);
6580 *total += COSTS_N_INSNS (4);
6585 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6586 *total += rtx_cost (XEXP (x, 0), code, speed);
6589 *total += COSTS_N_INSNS (3);
6593 *total += COSTS_N_INSNS (1);
6594 /* Increase the cost of complex shifts because they aren't any faster,
6595 and reduce dual issue opportunities. */
6596 if (arm_tune_cortex_a9
6597 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6605 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6606 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6607 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6609 *total += rtx_cost (XEXP (x, 1), code, speed);
6613 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6614 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6616 *total += rtx_cost (XEXP (x, 0), code, speed);
6623 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6625 if (TARGET_HARD_FLOAT
6627 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6629 *total = COSTS_N_INSNS (1);
6630 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6631 && arm_const_double_rtx (XEXP (x, 0)))
6633 *total += rtx_cost (XEXP (x, 1), code, speed);
6637 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6638 && arm_const_double_rtx (XEXP (x, 1)))
6640 *total += rtx_cost (XEXP (x, 0), code, speed);
6646 *total = COSTS_N_INSNS (20);
6650 *total = COSTS_N_INSNS (1);
6651 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6652 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6654 *total += rtx_cost (XEXP (x, 1), code, speed);
6658 subcode = GET_CODE (XEXP (x, 1));
6659 if (subcode == ASHIFT || subcode == ASHIFTRT
6660 || subcode == LSHIFTRT
6661 || subcode == ROTATE || subcode == ROTATERT)
6663 *total += rtx_cost (XEXP (x, 0), code, speed);
6664 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6668 /* A shift as a part of RSB costs no more than RSB itself. */
6669 if (GET_CODE (XEXP (x, 0)) == MULT
6670 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6672 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6673 *total += rtx_cost (XEXP (x, 1), code, speed);
6678 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6680 *total += rtx_cost (XEXP (x, 0), code, speed);
6681 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6685 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6686 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6688 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6689 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6690 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6691 *total += COSTS_N_INSNS (1);
6699 if (code == PLUS && arm_arch6 && mode == SImode
6700 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6701 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6703 *total = COSTS_N_INSNS (1);
6704 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6706 *total += rtx_cost (XEXP (x, 1), code, speed);
6710 /* MLA: All arguments must be registers. We filter out
6711 multiplication by a power of two, so that we fall down into
6713 if (GET_CODE (XEXP (x, 0)) == MULT
6714 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6716 /* The cost comes from the cost of the multiply. */
6720 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6722 if (TARGET_HARD_FLOAT
6724 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6726 *total = COSTS_N_INSNS (1);
6727 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6728 && arm_const_double_rtx (XEXP (x, 1)))
6730 *total += rtx_cost (XEXP (x, 0), code, speed);
6737 *total = COSTS_N_INSNS (20);
6741 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6742 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6744 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6745 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6746 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6747 *total += COSTS_N_INSNS (1);
6753 case AND: case XOR: case IOR:
6755 /* Normally the frame registers will be spilt into reg+const during
6756 reload, so it is a bad idea to combine them with other instructions,
6757 since then they might not be moved outside of loops. As a compromise
6758 we allow integration with ops that have a constant as their second
6760 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6761 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6762 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6763 *total = COSTS_N_INSNS (1);
6767 *total += COSTS_N_INSNS (2);
6768 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6769 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6771 *total += rtx_cost (XEXP (x, 0), code, speed);
6778 *total += COSTS_N_INSNS (1);
6779 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6780 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6782 *total += rtx_cost (XEXP (x, 0), code, speed);
6785 subcode = GET_CODE (XEXP (x, 0));
6786 if (subcode == ASHIFT || subcode == ASHIFTRT
6787 || subcode == LSHIFTRT
6788 || subcode == ROTATE || subcode == ROTATERT)
6790 *total += rtx_cost (XEXP (x, 1), code, speed);
6791 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6796 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6798 *total += rtx_cost (XEXP (x, 1), code, speed);
6799 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6803 if (subcode == UMIN || subcode == UMAX
6804 || subcode == SMIN || subcode == SMAX)
6806 *total = COSTS_N_INSNS (3);
6813 /* This should have been handled by the CPU specific routines. */
6817 if (arm_arch3m && mode == SImode
6818 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6819 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6820 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6821 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6822 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6823 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6825 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6828 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6832 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6834 if (TARGET_HARD_FLOAT
6836 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6838 *total = COSTS_N_INSNS (1);
6841 *total = COSTS_N_INSNS (2);
6847 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6848 if (mode == SImode && code == NOT)
6850 subcode = GET_CODE (XEXP (x, 0));
6851 if (subcode == ASHIFT || subcode == ASHIFTRT
6852 || subcode == LSHIFTRT
6853 || subcode == ROTATE || subcode == ROTATERT
6855 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6857 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6858 /* Register shifts cost an extra cycle. */
6859 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6860 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6869 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6871 *total = COSTS_N_INSNS (4);
6875 operand = XEXP (x, 0);
6877 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6878 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6879 && GET_CODE (XEXP (operand, 0)) == REG
6880 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6881 *total += COSTS_N_INSNS (1);
6882 *total += (rtx_cost (XEXP (x, 1), code, speed)
6883 + rtx_cost (XEXP (x, 2), code, speed));
6887 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6889 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6895 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6896 && mode == SImode && XEXP (x, 1) == const0_rtx)
6898 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6904 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6905 && mode == SImode && XEXP (x, 1) == const0_rtx)
6907 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6927 /* SCC insns. In the case where the comparison has already been
6928 performed, then they cost 2 instructions. Otherwise they need
6929 an additional comparison before them. */
6930 *total = COSTS_N_INSNS (2);
6931 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6938 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6944 *total += COSTS_N_INSNS (1);
6945 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6946 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6948 *total += rtx_cost (XEXP (x, 0), code, speed);
6952 subcode = GET_CODE (XEXP (x, 0));
6953 if (subcode == ASHIFT || subcode == ASHIFTRT
6954 || subcode == LSHIFTRT
6955 || subcode == ROTATE || subcode == ROTATERT)
6957 *total += rtx_cost (XEXP (x, 1), code, speed);
6958 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6963 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6965 *total += rtx_cost (XEXP (x, 1), code, speed);
6966 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6976 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6977 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6978 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6979 *total += rtx_cost (XEXP (x, 1), code, speed);
6983 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6985 if (TARGET_HARD_FLOAT
6987 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6989 *total = COSTS_N_INSNS (1);
6992 *total = COSTS_N_INSNS (20);
6995 *total = COSTS_N_INSNS (1);
6997 *total += COSTS_N_INSNS (3);
7003 if (GET_MODE_CLASS (mode) == MODE_INT)
7005 rtx op = XEXP (x, 0);
7006 enum machine_mode opmode = GET_MODE (op);
7009 *total += COSTS_N_INSNS (1);
7011 if (opmode != SImode)
7015 /* If !arm_arch4, we use one of the extendhisi2_mem
7016 or movhi_bytes patterns for HImode. For a QImode
7017 sign extension, we first zero-extend from memory
7018 and then perform a shift sequence. */
7019 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7020 *total += COSTS_N_INSNS (2);
7023 *total += COSTS_N_INSNS (1);
7025 /* We don't have the necessary insn, so we need to perform some
7027 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7028 /* An and with constant 255. */
7029 *total += COSTS_N_INSNS (1);
7031 /* A shift sequence. Increase costs slightly to avoid
7032 combining two shifts into an extend operation. */
7033 *total += COSTS_N_INSNS (2) + 1;
7039 switch (GET_MODE (XEXP (x, 0)))
7046 *total = COSTS_N_INSNS (1);
7056 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7060 if (const_ok_for_arm (INTVAL (x))
7061 || const_ok_for_arm (~INTVAL (x)))
7062 *total = COSTS_N_INSNS (1);
7064 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7065 INTVAL (x), NULL_RTX,
7072 *total = COSTS_N_INSNS (3);
7076 *total = COSTS_N_INSNS (1);
7080 *total = COSTS_N_INSNS (1);
7081 *total += rtx_cost (XEXP (x, 0), code, speed);
7085 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7086 && (mode == SFmode || !TARGET_VFP_SINGLE))
7087 *total = COSTS_N_INSNS (1);
7089 *total = COSTS_N_INSNS (4);
7093 *total = COSTS_N_INSNS (4);
7098 /* Estimates the size cost of thumb1 instructions.
7099 For now most of the code is copied from thumb1_rtx_costs. We need more
7100 fine grain tuning when we have more related test cases. */
7102 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7104 enum machine_mode mode = GET_MODE (x);
7117 return COSTS_N_INSNS (1);
7120 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7122 /* Thumb1 mul instruction can't operate on const. We must Load it
7123 into a register first. */
7124 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7125 return COSTS_N_INSNS (1) + const_size;
7127 return COSTS_N_INSNS (1);
7130 return (COSTS_N_INSNS (1)
7131 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7132 + GET_CODE (SET_DEST (x)) == MEM));
7137 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7138 return COSTS_N_INSNS (1);
7139 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7140 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7141 return COSTS_N_INSNS (2);
7142 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7143 if (thumb_shiftable_const (INTVAL (x)))
7144 return COSTS_N_INSNS (2);
7145 return COSTS_N_INSNS (3);
7147 else if ((outer == PLUS || outer == COMPARE)
7148 && INTVAL (x) < 256 && INTVAL (x) > -256)
7150 else if ((outer == IOR || outer == XOR || outer == AND)
7151 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7152 return COSTS_N_INSNS (1);
7153 else if (outer == AND)
7156 /* This duplicates the tests in the andsi3 expander. */
7157 for (i = 9; i <= 31; i++)
7158 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7159 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7160 return COSTS_N_INSNS (2);
7162 else if (outer == ASHIFT || outer == ASHIFTRT
7163 || outer == LSHIFTRT)
7165 return COSTS_N_INSNS (2);
7171 return COSTS_N_INSNS (3);
7189 /* XXX another guess. */
7190 /* Memory costs quite a lot for the first word, but subsequent words
7191 load at the equivalent of a single insn each. */
7192 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7193 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7198 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7203 /* XXX still guessing. */
7204 switch (GET_MODE (XEXP (x, 0)))
7207 return (1 + (mode == DImode ? 4 : 0)
7208 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7211 return (4 + (mode == DImode ? 4 : 0)
7212 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7215 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7226 /* RTX costs when optimizing for size. */
7228 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7231 enum machine_mode mode = GET_MODE (x);
7234 *total = thumb1_size_rtx_costs (x, code, outer_code);
7238 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7242 /* A memory access costs 1 insn if the mode is small, or the address is
7243 a single register, otherwise it costs one insn per word. */
7244 if (REG_P (XEXP (x, 0)))
7245 *total = COSTS_N_INSNS (1);
7247 && GET_CODE (XEXP (x, 0)) == PLUS
7248 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7249 /* This will be split into two instructions.
7250 See arm.md:calculate_pic_address. */
7251 *total = COSTS_N_INSNS (2);
7253 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7260 /* Needs a libcall, so it costs about this. */
7261 *total = COSTS_N_INSNS (2);
7265 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7267 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7275 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7277 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7280 else if (mode == SImode)
7282 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7283 /* Slightly disparage register shifts, but not by much. */
7284 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7285 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7289 /* Needs a libcall. */
7290 *total = COSTS_N_INSNS (2);
7294 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7295 && (mode == SFmode || !TARGET_VFP_SINGLE))
7297 *total = COSTS_N_INSNS (1);
7303 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7304 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7306 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7307 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7308 || subcode1 == ROTATE || subcode1 == ROTATERT
7309 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7310 || subcode1 == ASHIFTRT)
7312 /* It's just the cost of the two operands. */
7317 *total = COSTS_N_INSNS (1);
7321 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7325 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7326 && (mode == SFmode || !TARGET_VFP_SINGLE))
7328 *total = COSTS_N_INSNS (1);
7332 /* A shift as a part of ADD costs nothing. */
7333 if (GET_CODE (XEXP (x, 0)) == MULT
7334 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7336 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7337 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7338 *total += rtx_cost (XEXP (x, 1), code, false);
7343 case AND: case XOR: case IOR:
7346 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7348 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7349 || subcode == LSHIFTRT || subcode == ASHIFTRT
7350 || (code == AND && subcode == NOT))
7352 /* It's just the cost of the two operands. */
7358 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7362 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7366 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7367 && (mode == SFmode || !TARGET_VFP_SINGLE))
7369 *total = COSTS_N_INSNS (1);
7375 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7384 if (cc_register (XEXP (x, 0), VOIDmode))
7387 *total = COSTS_N_INSNS (1);
7391 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7392 && (mode == SFmode || !TARGET_VFP_SINGLE))
7393 *total = COSTS_N_INSNS (1);
7395 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7400 return arm_rtx_costs_1 (x, outer_code, total, 0);
7403 if (const_ok_for_arm (INTVAL (x)))
7404 /* A multiplication by a constant requires another instruction
7405 to load the constant to a register. */
7406 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7408 else if (const_ok_for_arm (~INTVAL (x)))
7409 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7410 else if (const_ok_for_arm (-INTVAL (x)))
7412 if (outer_code == COMPARE || outer_code == PLUS
7413 || outer_code == MINUS)
7416 *total = COSTS_N_INSNS (1);
7419 *total = COSTS_N_INSNS (2);
7425 *total = COSTS_N_INSNS (2);
7429 *total = COSTS_N_INSNS (4);
7434 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7435 cost of these slightly. */
7436 *total = COSTS_N_INSNS (1) + 1;
7440 if (mode != VOIDmode)
7441 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7443 *total = COSTS_N_INSNS (4); /* How knows? */
7448 /* RTX costs when optimizing for size. */
7450 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7454 return arm_size_rtx_costs (x, (enum rtx_code) code,
7455 (enum rtx_code) outer_code, total);
7457 return current_tune->rtx_costs (x, (enum rtx_code) code,
7458 (enum rtx_code) outer_code,
7462 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7463 supported on any "slowmul" cores, so it can be ignored. */
7466 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7467 int *total, bool speed)
7469 enum machine_mode mode = GET_MODE (x);
7473 *total = thumb1_rtx_costs (x, code, outer_code);
7480 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7483 *total = COSTS_N_INSNS (20);
7487 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7489 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7490 & (unsigned HOST_WIDE_INT) 0xffffffff);
7491 int cost, const_ok = const_ok_for_arm (i);
7492 int j, booth_unit_size;
7494 /* Tune as appropriate. */
7495 cost = const_ok ? 4 : 8;
7496 booth_unit_size = 2;
7497 for (j = 0; i && j < 32; j += booth_unit_size)
7499 i >>= booth_unit_size;
7503 *total = COSTS_N_INSNS (cost);
7504 *total += rtx_cost (XEXP (x, 0), code, speed);
7508 *total = COSTS_N_INSNS (20);
7512 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7517 /* RTX cost for cores with a fast multiply unit (M variants). */
7520 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7521 int *total, bool speed)
7523 enum machine_mode mode = GET_MODE (x);
7527 *total = thumb1_rtx_costs (x, code, outer_code);
7531 /* ??? should thumb2 use different costs? */
7535 /* There is no point basing this on the tuning, since it is always the
7536 fast variant if it exists at all. */
7538 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7539 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7540 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7542 *total = COSTS_N_INSNS(2);
7549 *total = COSTS_N_INSNS (5);
7553 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7555 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7556 & (unsigned HOST_WIDE_INT) 0xffffffff);
7557 int cost, const_ok = const_ok_for_arm (i);
7558 int j, booth_unit_size;
7560 /* Tune as appropriate. */
7561 cost = const_ok ? 4 : 8;
7562 booth_unit_size = 8;
7563 for (j = 0; i && j < 32; j += booth_unit_size)
7565 i >>= booth_unit_size;
7569 *total = COSTS_N_INSNS(cost);
7575 *total = COSTS_N_INSNS (4);
7579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7581 if (TARGET_HARD_FLOAT
7583 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7585 *total = COSTS_N_INSNS (1);
7590 /* Requires a lib call */
7591 *total = COSTS_N_INSNS (20);
7595 return arm_rtx_costs_1 (x, outer_code, total, speed);
7600 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7601 so it can be ignored. */
7604 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7605 int *total, bool speed)
7607 enum machine_mode mode = GET_MODE (x);
7611 *total = thumb1_rtx_costs (x, code, outer_code);
7618 if (GET_CODE (XEXP (x, 0)) != MULT)
7619 return arm_rtx_costs_1 (x, outer_code, total, speed);
7621 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7622 will stall until the multiplication is complete. */
7623 *total = COSTS_N_INSNS (3);
7627 /* There is no point basing this on the tuning, since it is always the
7628 fast variant if it exists at all. */
7630 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7631 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7632 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7634 *total = COSTS_N_INSNS (2);
7641 *total = COSTS_N_INSNS (5);
7645 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7647 /* If operand 1 is a constant we can more accurately
7648 calculate the cost of the multiply. The multiplier can
7649 retire 15 bits on the first cycle and a further 12 on the
7650 second. We do, of course, have to load the constant into
7651 a register first. */
7652 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7653 /* There's a general overhead of one cycle. */
7655 unsigned HOST_WIDE_INT masked_const;
7660 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7662 masked_const = i & 0xffff8000;
7663 if (masked_const != 0)
7666 masked_const = i & 0xf8000000;
7667 if (masked_const != 0)
7670 *total = COSTS_N_INSNS (cost);
7676 *total = COSTS_N_INSNS (3);
7680 /* Requires a lib call */
7681 *total = COSTS_N_INSNS (20);
7685 return arm_rtx_costs_1 (x, outer_code, total, speed);
7690 /* RTX costs for 9e (and later) cores. */
7693 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7694 int *total, bool speed)
7696 enum machine_mode mode = GET_MODE (x);
7703 *total = COSTS_N_INSNS (3);
7707 *total = thumb1_rtx_costs (x, code, outer_code);
7715 /* There is no point basing this on the tuning, since it is always the
7716 fast variant if it exists at all. */
7718 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7719 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7720 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7722 *total = COSTS_N_INSNS (2);
7729 *total = COSTS_N_INSNS (5);
7735 *total = COSTS_N_INSNS (2);
7739 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7741 if (TARGET_HARD_FLOAT
7743 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7745 *total = COSTS_N_INSNS (1);
7750 *total = COSTS_N_INSNS (20);
7754 return arm_rtx_costs_1 (x, outer_code, total, speed);
7757 /* All address computations that can be done are free, but rtx cost returns
7758 the same for practically all of them. So we weight the different types
7759 of address here in the order (most pref first):
7760 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7762 arm_arm_address_cost (rtx x)
7764 enum rtx_code c = GET_CODE (x);
7766 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7768 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7773 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7776 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7786 arm_thumb_address_cost (rtx x)
7788 enum rtx_code c = GET_CODE (x);
7793 && GET_CODE (XEXP (x, 0)) == REG
7794 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7801 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7803 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7806 /* Adjust cost hook for XScale. */
7808 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7810 /* Some true dependencies can have a higher cost depending
7811 on precisely how certain input operands are used. */
7812 if (REG_NOTE_KIND(link) == 0
7813 && recog_memoized (insn) >= 0
7814 && recog_memoized (dep) >= 0)
7816 int shift_opnum = get_attr_shift (insn);
7817 enum attr_type attr_type = get_attr_type (dep);
7819 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7820 operand for INSN. If we have a shifted input operand and the
7821 instruction we depend on is another ALU instruction, then we may
7822 have to account for an additional stall. */
7823 if (shift_opnum != 0
7824 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7826 rtx shifted_operand;
7829 /* Get the shifted operand. */
7830 extract_insn (insn);
7831 shifted_operand = recog_data.operand[shift_opnum];
7833 /* Iterate over all the operands in DEP. If we write an operand
7834 that overlaps with SHIFTED_OPERAND, then we have increase the
7835 cost of this dependency. */
7837 preprocess_constraints ();
7838 for (opno = 0; opno < recog_data.n_operands; opno++)
7840 /* We can ignore strict inputs. */
7841 if (recog_data.operand_type[opno] == OP_IN)
7844 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7856 /* Adjust cost hook for Cortex A9. */
7858 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7860 switch (REG_NOTE_KIND (link))
7867 case REG_DEP_OUTPUT:
7868 if (recog_memoized (insn) >= 0
7869 && recog_memoized (dep) >= 0)
7871 if (GET_CODE (PATTERN (insn)) == SET)
7874 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7876 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7878 enum attr_type attr_type_insn = get_attr_type (insn);
7879 enum attr_type attr_type_dep = get_attr_type (dep);
7881 /* By default all dependencies of the form
7884 have an extra latency of 1 cycle because
7885 of the input and output dependency in this
7886 case. However this gets modeled as an true
7887 dependency and hence all these checks. */
7888 if (REG_P (SET_DEST (PATTERN (insn)))
7889 && REG_P (SET_DEST (PATTERN (dep)))
7890 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7891 SET_DEST (PATTERN (dep))))
7893 /* FMACS is a special case where the dependant
7894 instruction can be issued 3 cycles before
7895 the normal latency in case of an output
7897 if ((attr_type_insn == TYPE_FMACS
7898 || attr_type_insn == TYPE_FMACD)
7899 && (attr_type_dep == TYPE_FMACS
7900 || attr_type_dep == TYPE_FMACD))
7902 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7903 *cost = insn_default_latency (dep) - 3;
7905 *cost = insn_default_latency (dep);
7910 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7911 *cost = insn_default_latency (dep) + 1;
7913 *cost = insn_default_latency (dep);
7929 /* Adjust cost hook for FA726TE. */
7931 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7933 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
7934 have penalty of 3. */
7935 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
7936 && recog_memoized (insn) >= 0
7937 && recog_memoized (dep) >= 0
7938 && get_attr_conds (dep) == CONDS_SET)
7940 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
7941 if (get_attr_conds (insn) == CONDS_USE
7942 && get_attr_type (insn) != TYPE_BRANCH)
7948 if (GET_CODE (PATTERN (insn)) == COND_EXEC
7949 || get_attr_conds (insn) == CONDS_USE)
7959 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7960 It corrects the value of COST based on the relationship between
7961 INSN and DEP through the dependence LINK. It returns the new
7962 value. There is a per-core adjust_cost hook to adjust scheduler costs
7963 and the per-core hook can choose to completely override the generic
7964 adjust_cost function. Only put bits of code into arm_adjust_cost that
7965 are common across all cores. */
7967 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7971 /* When generating Thumb-1 code, we want to place flag-setting operations
7972 close to a conditional branch which depends on them, so that we can
7973 omit the comparison. */
7975 && REG_NOTE_KIND (link) == 0
7976 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7977 && recog_memoized (dep) >= 0
7978 && get_attr_conds (dep) == CONDS_SET)
7981 if (current_tune->sched_adjust_cost != NULL)
7983 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7987 /* XXX This is not strictly true for the FPA. */
7988 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7989 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7992 /* Call insns don't incur a stall, even if they follow a load. */
7993 if (REG_NOTE_KIND (link) == 0
7994 && GET_CODE (insn) == CALL_INSN)
7997 if ((i_pat = single_set (insn)) != NULL
7998 && GET_CODE (SET_SRC (i_pat)) == MEM
7999 && (d_pat = single_set (dep)) != NULL
8000 && GET_CODE (SET_DEST (d_pat)) == MEM)
8002 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8003 /* This is a load after a store, there is no conflict if the load reads
8004 from a cached area. Assume that loads from the stack, and from the
8005 constant pool are cached, and that others will miss. This is a
8008 if ((GET_CODE (src_mem) == SYMBOL_REF
8009 && CONSTANT_POOL_ADDRESS_P (src_mem))
8010 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8011 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8012 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8019 static int fp_consts_inited = 0;
8021 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8022 static const char * const strings_fp[8] =
8025 "4", "5", "0.5", "10"
8028 static REAL_VALUE_TYPE values_fp[8];
8031 init_fp_table (void)
8037 fp_consts_inited = 1;
8039 fp_consts_inited = 8;
8041 for (i = 0; i < fp_consts_inited; i++)
8043 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8048 /* Return TRUE if rtx X is a valid immediate FP constant. */
8050 arm_const_double_rtx (rtx x)
8055 if (!fp_consts_inited)
8058 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8059 if (REAL_VALUE_MINUS_ZERO (r))
8062 for (i = 0; i < fp_consts_inited; i++)
8063 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8069 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8071 neg_const_double_rtx_ok_for_fpa (rtx x)
8076 if (!fp_consts_inited)
8079 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8080 r = real_value_negate (&r);
8081 if (REAL_VALUE_MINUS_ZERO (r))
8084 for (i = 0; i < 8; i++)
8085 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8092 /* VFPv3 has a fairly wide range of representable immediates, formed from
8093 "quarter-precision" floating-point values. These can be evaluated using this
8094 formula (with ^ for exponentiation):
8098 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8099 16 <= n <= 31 and 0 <= r <= 7.
8101 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8103 - A (most-significant) is the sign bit.
8104 - BCD are the exponent (encoded as r XOR 3).
8105 - EFGH are the mantissa (encoded as n - 16).
8108 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8109 fconst[sd] instruction, or -1 if X isn't suitable. */
8111 vfp3_const_double_index (rtx x)
8113 REAL_VALUE_TYPE r, m;
8115 unsigned HOST_WIDE_INT mantissa, mant_hi;
8116 unsigned HOST_WIDE_INT mask;
8117 HOST_WIDE_INT m1, m2;
8118 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8120 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8123 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8125 /* We can't represent these things, so detect them first. */
8126 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8129 /* Extract sign, exponent and mantissa. */
8130 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8131 r = real_value_abs (&r);
8132 exponent = REAL_EXP (&r);
8133 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8134 highest (sign) bit, with a fixed binary point at bit point_pos.
8135 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8136 bits for the mantissa, this may fail (low bits would be lost). */
8137 real_ldexp (&m, &r, point_pos - exponent);
8138 REAL_VALUE_TO_INT (&m1, &m2, m);
8142 /* If there are bits set in the low part of the mantissa, we can't
8143 represent this value. */
8147 /* Now make it so that mantissa contains the most-significant bits, and move
8148 the point_pos to indicate that the least-significant bits have been
8150 point_pos -= HOST_BITS_PER_WIDE_INT;
8153 /* We can permit four significant bits of mantissa only, plus a high bit
8154 which is always 1. */
8155 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8156 if ((mantissa & mask) != 0)
8159 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8160 mantissa >>= point_pos - 5;
8162 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8163 floating-point immediate zero with Neon using an integer-zero load, but
8164 that case is handled elsewhere.) */
8168 gcc_assert (mantissa >= 16 && mantissa <= 31);
8170 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8171 normalized significands are in the range [1, 2). (Our mantissa is shifted
8172 left 4 places at this point relative to normalized IEEE754 values). GCC
8173 internally uses [0.5, 1) (see real.c), so the exponent returned from
8174 REAL_EXP must be altered. */
8175 exponent = 5 - exponent;
8177 if (exponent < 0 || exponent > 7)
8180 /* Sign, mantissa and exponent are now in the correct form to plug into the
8181 formula described in the comment above. */
8182 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8185 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8187 vfp3_const_double_rtx (rtx x)
8192 return vfp3_const_double_index (x) != -1;
8195 /* Recognize immediates which can be used in various Neon instructions. Legal
8196 immediates are described by the following table (for VMVN variants, the
8197 bitwise inverse of the constant shown is recognized. In either case, VMOV
8198 is output and the correct instruction to use for a given constant is chosen
8199 by the assembler). The constant shown is replicated across all elements of
8200 the destination vector.
8202 insn elems variant constant (binary)
8203 ---- ----- ------- -----------------
8204 vmov i32 0 00000000 00000000 00000000 abcdefgh
8205 vmov i32 1 00000000 00000000 abcdefgh 00000000
8206 vmov i32 2 00000000 abcdefgh 00000000 00000000
8207 vmov i32 3 abcdefgh 00000000 00000000 00000000
8208 vmov i16 4 00000000 abcdefgh
8209 vmov i16 5 abcdefgh 00000000
8210 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8211 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8212 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8213 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8214 vmvn i16 10 00000000 abcdefgh
8215 vmvn i16 11 abcdefgh 00000000
8216 vmov i32 12 00000000 00000000 abcdefgh 11111111
8217 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8218 vmov i32 14 00000000 abcdefgh 11111111 11111111
8219 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8221 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8222 eeeeeeee ffffffff gggggggg hhhhhhhh
8223 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8225 For case 18, B = !b. Representable values are exactly those accepted by
8226 vfp3_const_double_index, but are output as floating-point numbers rather
8229 Variants 0-5 (inclusive) may also be used as immediates for the second
8230 operand of VORR/VBIC instructions.
8232 The INVERSE argument causes the bitwise inverse of the given operand to be
8233 recognized instead (used for recognizing legal immediates for the VAND/VORN
8234 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8235 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8236 output, rather than the real insns vbic/vorr).
8238 INVERSE makes no difference to the recognition of float vectors.
8240 The return value is the variant of immediate as shown in the above table, or
8241 -1 if the given value doesn't match any of the listed patterns.
8244 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8245 rtx *modconst, int *elementwidth)
8247 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8249 for (i = 0; i < idx; i += (STRIDE)) \
8254 immtype = (CLASS); \
8255 elsize = (ELSIZE); \
8259 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8260 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8261 unsigned char bytes[16];
8262 int immtype = -1, matches;
8263 unsigned int invmask = inverse ? 0xff : 0;
8265 /* Vectors of float constants. */
8266 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8268 rtx el0 = CONST_VECTOR_ELT (op, 0);
8271 if (!vfp3_const_double_rtx (el0))
8274 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8276 for (i = 1; i < n_elts; i++)
8278 rtx elt = CONST_VECTOR_ELT (op, i);
8281 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8283 if (!REAL_VALUES_EQUAL (r0, re))
8288 *modconst = CONST_VECTOR_ELT (op, 0);
8296 /* Splat vector constant out into a byte vector. */
8297 for (i = 0; i < n_elts; i++)
8299 rtx el = CONST_VECTOR_ELT (op, i);
8300 unsigned HOST_WIDE_INT elpart;
8301 unsigned int part, parts;
8303 if (GET_CODE (el) == CONST_INT)
8305 elpart = INTVAL (el);
8308 else if (GET_CODE (el) == CONST_DOUBLE)
8310 elpart = CONST_DOUBLE_LOW (el);
8316 for (part = 0; part < parts; part++)
8319 for (byte = 0; byte < innersize; byte++)
8321 bytes[idx++] = (elpart & 0xff) ^ invmask;
8322 elpart >>= BITS_PER_UNIT;
8324 if (GET_CODE (el) == CONST_DOUBLE)
8325 elpart = CONST_DOUBLE_HIGH (el);
8330 gcc_assert (idx == GET_MODE_SIZE (mode));
8334 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8335 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8337 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8338 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8340 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8341 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8343 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8344 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8346 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8348 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8350 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8351 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8353 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8354 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8356 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8357 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8359 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8360 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8362 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8364 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8366 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8367 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8369 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8370 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8372 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8373 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8375 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8376 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8378 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8380 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8381 && bytes[i] == bytes[(i + 8) % idx]);
8389 *elementwidth = elsize;
8393 unsigned HOST_WIDE_INT imm = 0;
8395 /* Un-invert bytes of recognized vector, if necessary. */
8397 for (i = 0; i < idx; i++)
8398 bytes[i] ^= invmask;
8402 /* FIXME: Broken on 32-bit H_W_I hosts. */
8403 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8405 for (i = 0; i < 8; i++)
8406 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8407 << (i * BITS_PER_UNIT);
8409 *modconst = GEN_INT (imm);
8413 unsigned HOST_WIDE_INT imm = 0;
8415 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8416 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8418 *modconst = GEN_INT (imm);
8426 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8427 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8428 float elements), and a modified constant (whatever should be output for a
8429 VMOV) in *MODCONST. */
8432 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8433 rtx *modconst, int *elementwidth)
8437 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8443 *modconst = tmpconst;
8446 *elementwidth = tmpwidth;
8451 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8452 the immediate is valid, write a constant suitable for using as an operand
8453 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8454 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8457 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8458 rtx *modconst, int *elementwidth)
8462 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8464 if (retval < 0 || retval > 5)
8468 *modconst = tmpconst;
8471 *elementwidth = tmpwidth;
8476 /* Return a string suitable for output of Neon immediate logic operation
8480 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8481 int inverse, int quad)
8483 int width, is_valid;
8484 static char templ[40];
8486 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8488 gcc_assert (is_valid != 0);
8491 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8493 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8498 /* Output a sequence of pairwise operations to implement a reduction.
8499 NOTE: We do "too much work" here, because pairwise operations work on two
8500 registers-worth of operands in one go. Unfortunately we can't exploit those
8501 extra calculations to do the full operation in fewer steps, I don't think.
8502 Although all vector elements of the result but the first are ignored, we
8503 actually calculate the same result in each of the elements. An alternative
8504 such as initially loading a vector with zero to use as each of the second
8505 operands would use up an additional register and take an extra instruction,
8506 for no particular gain. */
8509 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8510 rtx (*reduc) (rtx, rtx, rtx))
8512 enum machine_mode inner = GET_MODE_INNER (mode);
8513 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8516 for (i = parts / 2; i >= 1; i /= 2)
8518 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8519 emit_insn (reduc (dest, tmpsum, tmpsum));
8524 /* If VALS is a vector constant that can be loaded into a register
8525 using VDUP, generate instructions to do so and return an RTX to
8526 assign to the register. Otherwise return NULL_RTX. */
8529 neon_vdup_constant (rtx vals)
8531 enum machine_mode mode = GET_MODE (vals);
8532 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8533 int n_elts = GET_MODE_NUNITS (mode);
8534 bool all_same = true;
8538 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8541 for (i = 0; i < n_elts; ++i)
8543 x = XVECEXP (vals, 0, i);
8544 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8549 /* The elements are not all the same. We could handle repeating
8550 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8551 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8555 /* We can load this constant by using VDUP and a constant in a
8556 single ARM register. This will be cheaper than a vector
8559 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8560 return gen_rtx_VEC_DUPLICATE (mode, x);
8563 /* Generate code to load VALS, which is a PARALLEL containing only
8564 constants (for vec_init) or CONST_VECTOR, efficiently into a
8565 register. Returns an RTX to copy into the register, or NULL_RTX
8566 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8569 neon_make_constant (rtx vals)
8571 enum machine_mode mode = GET_MODE (vals);
8573 rtx const_vec = NULL_RTX;
8574 int n_elts = GET_MODE_NUNITS (mode);
8578 if (GET_CODE (vals) == CONST_VECTOR)
8580 else if (GET_CODE (vals) == PARALLEL)
8582 /* A CONST_VECTOR must contain only CONST_INTs and
8583 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8584 Only store valid constants in a CONST_VECTOR. */
8585 for (i = 0; i < n_elts; ++i)
8587 rtx x = XVECEXP (vals, 0, i);
8588 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8591 if (n_const == n_elts)
8592 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8597 if (const_vec != NULL
8598 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8599 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8601 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8602 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8603 pipeline cycle; creating the constant takes one or two ARM
8606 else if (const_vec != NULL_RTX)
8607 /* Load from constant pool. On Cortex-A8 this takes two cycles
8608 (for either double or quad vectors). We can not take advantage
8609 of single-cycle VLD1 because we need a PC-relative addressing
8613 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8614 We can not construct an initializer. */
8618 /* Initialize vector TARGET to VALS. */
8621 neon_expand_vector_init (rtx target, rtx vals)
8623 enum machine_mode mode = GET_MODE (target);
8624 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8625 int n_elts = GET_MODE_NUNITS (mode);
8626 int n_var = 0, one_var = -1;
8627 bool all_same = true;
8631 for (i = 0; i < n_elts; ++i)
8633 x = XVECEXP (vals, 0, i);
8634 if (!CONSTANT_P (x))
8635 ++n_var, one_var = i;
8637 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8643 rtx constant = neon_make_constant (vals);
8644 if (constant != NULL_RTX)
8646 emit_move_insn (target, constant);
8651 /* Splat a single non-constant element if we can. */
8652 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8654 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8655 emit_insn (gen_rtx_SET (VOIDmode, target,
8656 gen_rtx_VEC_DUPLICATE (mode, x)));
8660 /* One field is non-constant. Load constant then overwrite varying
8661 field. This is more efficient than using the stack. */
8664 rtx copy = copy_rtx (vals);
8665 rtx index = GEN_INT (one_var);
8667 /* Load constant part of vector, substitute neighboring value for
8669 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8670 neon_expand_vector_init (target, copy);
8672 /* Insert variable. */
8673 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8677 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8680 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8683 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8686 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8689 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8692 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8695 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8698 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8701 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8709 /* Construct the vector in memory one field at a time
8710 and load the whole vector. */
8711 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8712 for (i = 0; i < n_elts; i++)
8713 emit_move_insn (adjust_address_nv (mem, inner_mode,
8714 i * GET_MODE_SIZE (inner_mode)),
8715 XVECEXP (vals, 0, i));
8716 emit_move_insn (target, mem);
8719 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8720 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8721 reported source locations are bogus. */
8724 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8729 gcc_assert (GET_CODE (operand) == CONST_INT);
8731 lane = INTVAL (operand);
8733 if (lane < low || lane >= high)
8737 /* Bounds-check lanes. */
8740 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8742 bounds_check (operand, low, high, "lane out of range");
8745 /* Bounds-check constants. */
8748 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8750 bounds_check (operand, low, high, "constant out of range");
8754 neon_element_bits (enum machine_mode mode)
8757 return GET_MODE_BITSIZE (mode);
8759 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8763 /* Predicates for `match_operand' and `match_operator'. */
8765 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8767 cirrus_memory_offset (rtx op)
8769 /* Reject eliminable registers. */
8770 if (! (reload_in_progress || reload_completed)
8771 && ( reg_mentioned_p (frame_pointer_rtx, op)
8772 || reg_mentioned_p (arg_pointer_rtx, op)
8773 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8774 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8775 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8776 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8779 if (GET_CODE (op) == MEM)
8785 /* Match: (mem (reg)). */
8786 if (GET_CODE (ind) == REG)
8792 if (GET_CODE (ind) == PLUS
8793 && GET_CODE (XEXP (ind, 0)) == REG
8794 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8795 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8802 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8803 WB is true if full writeback address modes are allowed and is false
8804 if limited writeback address modes (POST_INC and PRE_DEC) are
8808 arm_coproc_mem_operand (rtx op, bool wb)
8812 /* Reject eliminable registers. */
8813 if (! (reload_in_progress || reload_completed)
8814 && ( reg_mentioned_p (frame_pointer_rtx, op)
8815 || reg_mentioned_p (arg_pointer_rtx, op)
8816 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8817 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8818 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8819 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8822 /* Constants are converted into offsets from labels. */
8823 if (GET_CODE (op) != MEM)
8828 if (reload_completed
8829 && (GET_CODE (ind) == LABEL_REF
8830 || (GET_CODE (ind) == CONST
8831 && GET_CODE (XEXP (ind, 0)) == PLUS
8832 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8833 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8836 /* Match: (mem (reg)). */
8837 if (GET_CODE (ind) == REG)
8838 return arm_address_register_rtx_p (ind, 0);
8840 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8841 acceptable in any case (subject to verification by
8842 arm_address_register_rtx_p). We need WB to be true to accept
8843 PRE_INC and POST_DEC. */
8844 if (GET_CODE (ind) == POST_INC
8845 || GET_CODE (ind) == PRE_DEC
8847 && (GET_CODE (ind) == PRE_INC
8848 || GET_CODE (ind) == POST_DEC)))
8849 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8852 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8853 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8854 && GET_CODE (XEXP (ind, 1)) == PLUS
8855 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8856 ind = XEXP (ind, 1);
8861 if (GET_CODE (ind) == PLUS
8862 && GET_CODE (XEXP (ind, 0)) == REG
8863 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8864 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8865 && INTVAL (XEXP (ind, 1)) > -1024
8866 && INTVAL (XEXP (ind, 1)) < 1024
8867 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8873 /* Return TRUE if OP is a memory operand which we can load or store a vector
8874 to/from. TYPE is one of the following values:
8875 0 - Vector load/stor (vldr)
8876 1 - Core registers (ldm)
8877 2 - Element/structure loads (vld1)
8880 neon_vector_mem_operand (rtx op, int type)
8884 /* Reject eliminable registers. */
8885 if (! (reload_in_progress || reload_completed)
8886 && ( reg_mentioned_p (frame_pointer_rtx, op)
8887 || reg_mentioned_p (arg_pointer_rtx, op)
8888 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8889 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8890 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8891 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8894 /* Constants are converted into offsets from labels. */
8895 if (GET_CODE (op) != MEM)
8900 if (reload_completed
8901 && (GET_CODE (ind) == LABEL_REF
8902 || (GET_CODE (ind) == CONST
8903 && GET_CODE (XEXP (ind, 0)) == PLUS
8904 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8905 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8908 /* Match: (mem (reg)). */
8909 if (GET_CODE (ind) == REG)
8910 return arm_address_register_rtx_p (ind, 0);
8912 /* Allow post-increment with Neon registers. */
8913 if ((type != 1 && GET_CODE (ind) == POST_INC)
8914 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8915 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8917 /* FIXME: vld1 allows register post-modify. */
8923 && GET_CODE (ind) == PLUS
8924 && GET_CODE (XEXP (ind, 0)) == REG
8925 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8926 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8927 && INTVAL (XEXP (ind, 1)) > -1024
8928 && INTVAL (XEXP (ind, 1)) < 1016
8929 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8935 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8938 neon_struct_mem_operand (rtx op)
8942 /* Reject eliminable registers. */
8943 if (! (reload_in_progress || reload_completed)
8944 && ( reg_mentioned_p (frame_pointer_rtx, op)
8945 || reg_mentioned_p (arg_pointer_rtx, op)
8946 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8947 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8948 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8949 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8952 /* Constants are converted into offsets from labels. */
8953 if (GET_CODE (op) != MEM)
8958 if (reload_completed
8959 && (GET_CODE (ind) == LABEL_REF
8960 || (GET_CODE (ind) == CONST
8961 && GET_CODE (XEXP (ind, 0)) == PLUS
8962 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8963 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8966 /* Match: (mem (reg)). */
8967 if (GET_CODE (ind) == REG)
8968 return arm_address_register_rtx_p (ind, 0);
8973 /* Return true if X is a register that will be eliminated later on. */
8975 arm_eliminable_register (rtx x)
8977 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8978 || REGNO (x) == ARG_POINTER_REGNUM
8979 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8980 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8983 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8984 coprocessor registers. Otherwise return NO_REGS. */
8987 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8991 if (!TARGET_NEON_FP16)
8992 return GENERAL_REGS;
8993 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8995 return GENERAL_REGS;
8998 /* The neon move patterns handle all legitimate vector and struct
9002 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9003 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9004 || VALID_NEON_STRUCT_MODE (mode)))
9007 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9010 return GENERAL_REGS;
9013 /* Values which must be returned in the most-significant end of the return
9017 arm_return_in_msb (const_tree valtype)
9019 return (TARGET_AAPCS_BASED
9021 && (AGGREGATE_TYPE_P (valtype)
9022 || TREE_CODE (valtype) == COMPLEX_TYPE));
9025 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9026 Use by the Cirrus Maverick code which has to workaround
9027 a hardware bug triggered by such instructions. */
9029 arm_memory_load_p (rtx insn)
9031 rtx body, lhs, rhs;;
9033 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9036 body = PATTERN (insn);
9038 if (GET_CODE (body) != SET)
9041 lhs = XEXP (body, 0);
9042 rhs = XEXP (body, 1);
9044 lhs = REG_OR_SUBREG_RTX (lhs);
9046 /* If the destination is not a general purpose
9047 register we do not have to worry. */
9048 if (GET_CODE (lhs) != REG
9049 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9052 /* As well as loads from memory we also have to react
9053 to loads of invalid constants which will be turned
9054 into loads from the minipool. */
9055 return (GET_CODE (rhs) == MEM
9056 || GET_CODE (rhs) == SYMBOL_REF
9057 || note_invalid_constants (insn, -1, false));
9060 /* Return TRUE if INSN is a Cirrus instruction. */
9062 arm_cirrus_insn_p (rtx insn)
9064 enum attr_cirrus attr;
9066 /* get_attr cannot accept USE or CLOBBER. */
9068 || GET_CODE (insn) != INSN
9069 || GET_CODE (PATTERN (insn)) == USE
9070 || GET_CODE (PATTERN (insn)) == CLOBBER)
9073 attr = get_attr_cirrus (insn);
9075 return attr != CIRRUS_NOT;
9078 /* Cirrus reorg for invalid instruction combinations. */
9080 cirrus_reorg (rtx first)
9082 enum attr_cirrus attr;
9083 rtx body = PATTERN (first);
9087 /* Any branch must be followed by 2 non Cirrus instructions. */
9088 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9091 t = next_nonnote_insn (first);
9093 if (arm_cirrus_insn_p (t))
9096 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9100 emit_insn_after (gen_nop (), first);
9105 /* (float (blah)) is in parallel with a clobber. */
9106 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9107 body = XVECEXP (body, 0, 0);
9109 if (GET_CODE (body) == SET)
9111 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9113 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9114 be followed by a non Cirrus insn. */
9115 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9117 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9118 emit_insn_after (gen_nop (), first);
9122 else if (arm_memory_load_p (first))
9124 unsigned int arm_regno;
9126 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9127 ldr/cfmv64hr combination where the Rd field is the same
9128 in both instructions must be split with a non Cirrus
9135 /* Get Arm register number for ldr insn. */
9136 if (GET_CODE (lhs) == REG)
9137 arm_regno = REGNO (lhs);
9140 gcc_assert (GET_CODE (rhs) == REG);
9141 arm_regno = REGNO (rhs);
9145 first = next_nonnote_insn (first);
9147 if (! arm_cirrus_insn_p (first))
9150 body = PATTERN (first);
9152 /* (float (blah)) is in parallel with a clobber. */
9153 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9154 body = XVECEXP (body, 0, 0);
9156 if (GET_CODE (body) == FLOAT)
9157 body = XEXP (body, 0);
9159 if (get_attr_cirrus (first) == CIRRUS_MOVE
9160 && GET_CODE (XEXP (body, 1)) == REG
9161 && arm_regno == REGNO (XEXP (body, 1)))
9162 emit_insn_after (gen_nop (), first);
9168 /* get_attr cannot accept USE or CLOBBER. */
9170 || GET_CODE (first) != INSN
9171 || GET_CODE (PATTERN (first)) == USE
9172 || GET_CODE (PATTERN (first)) == CLOBBER)
9175 attr = get_attr_cirrus (first);
9177 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9178 must be followed by a non-coprocessor instruction. */
9179 if (attr == CIRRUS_COMPARE)
9183 t = next_nonnote_insn (first);
9185 if (arm_cirrus_insn_p (t))
9188 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9192 emit_insn_after (gen_nop (), first);
9198 /* Return TRUE if X references a SYMBOL_REF. */
9200 symbol_mentioned_p (rtx x)
9205 if (GET_CODE (x) == SYMBOL_REF)
9208 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9209 are constant offsets, not symbols. */
9210 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9213 fmt = GET_RTX_FORMAT (GET_CODE (x));
9215 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9221 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9222 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9225 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9232 /* Return TRUE if X references a LABEL_REF. */
9234 label_mentioned_p (rtx x)
9239 if (GET_CODE (x) == LABEL_REF)
9242 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9243 instruction, but they are constant offsets, not symbols. */
9244 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9247 fmt = GET_RTX_FORMAT (GET_CODE (x));
9248 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9254 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9255 if (label_mentioned_p (XVECEXP (x, i, j)))
9258 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9266 tls_mentioned_p (rtx x)
9268 switch (GET_CODE (x))
9271 return tls_mentioned_p (XEXP (x, 0));
9274 if (XINT (x, 1) == UNSPEC_TLS)
9282 /* Must not copy any rtx that uses a pc-relative address. */
9285 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9287 if (GET_CODE (*x) == UNSPEC
9288 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9294 arm_cannot_copy_insn_p (rtx insn)
9296 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9302 enum rtx_code code = GET_CODE (x);
9319 /* Return 1 if memory locations are adjacent. */
9321 adjacent_mem_locations (rtx a, rtx b)
9323 /* We don't guarantee to preserve the order of these memory refs. */
9324 if (volatile_refs_p (a) || volatile_refs_p (b))
9327 if ((GET_CODE (XEXP (a, 0)) == REG
9328 || (GET_CODE (XEXP (a, 0)) == PLUS
9329 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9330 && (GET_CODE (XEXP (b, 0)) == REG
9331 || (GET_CODE (XEXP (b, 0)) == PLUS
9332 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9334 HOST_WIDE_INT val0 = 0, val1 = 0;
9338 if (GET_CODE (XEXP (a, 0)) == PLUS)
9340 reg0 = XEXP (XEXP (a, 0), 0);
9341 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9346 if (GET_CODE (XEXP (b, 0)) == PLUS)
9348 reg1 = XEXP (XEXP (b, 0), 0);
9349 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9354 /* Don't accept any offset that will require multiple
9355 instructions to handle, since this would cause the
9356 arith_adjacentmem pattern to output an overlong sequence. */
9357 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9360 /* Don't allow an eliminable register: register elimination can make
9361 the offset too large. */
9362 if (arm_eliminable_register (reg0))
9365 val_diff = val1 - val0;
9369 /* If the target has load delay slots, then there's no benefit
9370 to using an ldm instruction unless the offset is zero and
9371 we are optimizing for size. */
9372 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9373 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9374 && (val_diff == 4 || val_diff == -4));
9377 return ((REGNO (reg0) == REGNO (reg1))
9378 && (val_diff == 4 || val_diff == -4));
9384 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9385 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9386 instruction. ADD_OFFSET is nonzero if the base address register needs
9387 to be modified with an add instruction before we can use it. */
9390 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9391 int nops, HOST_WIDE_INT add_offset)
9393 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9394 if the offset isn't small enough. The reason 2 ldrs are faster
9395 is because these ARMs are able to do more than one cache access
9396 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9397 whilst the ARM8 has a double bandwidth cache. This means that
9398 these cores can do both an instruction fetch and a data fetch in
9399 a single cycle, so the trick of calculating the address into a
9400 scratch register (one of the result regs) and then doing a load
9401 multiple actually becomes slower (and no smaller in code size).
9402 That is the transformation
9404 ldr rd1, [rbase + offset]
9405 ldr rd2, [rbase + offset + 4]
9409 add rd1, rbase, offset
9410 ldmia rd1, {rd1, rd2}
9412 produces worse code -- '3 cycles + any stalls on rd2' instead of
9413 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9414 access per cycle, the first sequence could never complete in less
9415 than 6 cycles, whereas the ldm sequence would only take 5 and
9416 would make better use of sequential accesses if not hitting the
9419 We cheat here and test 'arm_ld_sched' which we currently know to
9420 only be true for the ARM8, ARM9 and StrongARM. If this ever
9421 changes, then the test below needs to be reworked. */
9422 if (nops == 2 && arm_ld_sched && add_offset != 0)
9425 /* XScale has load-store double instructions, but they have stricter
9426 alignment requirements than load-store multiple, so we cannot
9429 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9430 the pipeline until completion.
9438 An ldr instruction takes 1-3 cycles, but does not block the
9447 Best case ldr will always win. However, the more ldr instructions
9448 we issue, the less likely we are to be able to schedule them well.
9449 Using ldr instructions also increases code size.
9451 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9452 for counts of 3 or 4 regs. */
9453 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9458 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9459 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9460 an array ORDER which describes the sequence to use when accessing the
9461 offsets that produces an ascending order. In this sequence, each
9462 offset must be larger by exactly 4 than the previous one. ORDER[0]
9463 must have been filled in with the lowest offset by the caller.
9464 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9465 we use to verify that ORDER produces an ascending order of registers.
9466 Return true if it was possible to construct such an order, false if
9470 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9474 for (i = 1; i < nops; i++)
9478 order[i] = order[i - 1];
9479 for (j = 0; j < nops; j++)
9480 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9482 /* We must find exactly one offset that is higher than the
9483 previous one by 4. */
9484 if (order[i] != order[i - 1])
9488 if (order[i] == order[i - 1])
9490 /* The register numbers must be ascending. */
9491 if (unsorted_regs != NULL
9492 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9498 /* Used to determine in a peephole whether a sequence of load
9499 instructions can be changed into a load-multiple instruction.
9500 NOPS is the number of separate load instructions we are examining. The
9501 first NOPS entries in OPERANDS are the destination registers, the
9502 next NOPS entries are memory operands. If this function is
9503 successful, *BASE is set to the common base register of the memory
9504 accesses; *LOAD_OFFSET is set to the first memory location's offset
9505 from that base register.
9506 REGS is an array filled in with the destination register numbers.
9507 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9508 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9509 the sequence of registers in REGS matches the loads from ascending memory
9510 locations, and the function verifies that the register numbers are
9511 themselves ascending. If CHECK_REGS is false, the register numbers
9512 are stored in the order they are found in the operands. */
9514 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9515 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9517 int unsorted_regs[MAX_LDM_STM_OPS];
9518 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9519 int order[MAX_LDM_STM_OPS];
9520 rtx base_reg_rtx = NULL;
9524 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9525 easily extended if required. */
9526 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9528 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9530 /* Loop over the operands and check that the memory references are
9531 suitable (i.e. immediate offsets from the same base register). At
9532 the same time, extract the target register, and the memory
9534 for (i = 0; i < nops; i++)
9539 /* Convert a subreg of a mem into the mem itself. */
9540 if (GET_CODE (operands[nops + i]) == SUBREG)
9541 operands[nops + i] = alter_subreg (operands + (nops + i));
9543 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9545 /* Don't reorder volatile memory references; it doesn't seem worth
9546 looking for the case where the order is ok anyway. */
9547 if (MEM_VOLATILE_P (operands[nops + i]))
9550 offset = const0_rtx;
9552 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9553 || (GET_CODE (reg) == SUBREG
9554 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9555 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9556 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9558 || (GET_CODE (reg) == SUBREG
9559 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9560 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9565 base_reg = REGNO (reg);
9567 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9570 else if (base_reg != (int) REGNO (reg))
9571 /* Not addressed from the same base register. */
9574 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9575 ? REGNO (operands[i])
9576 : REGNO (SUBREG_REG (operands[i])));
9578 /* If it isn't an integer register, or if it overwrites the
9579 base register but isn't the last insn in the list, then
9580 we can't do this. */
9581 if (unsorted_regs[i] < 0
9582 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9583 || unsorted_regs[i] > 14
9584 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9587 unsorted_offsets[i] = INTVAL (offset);
9588 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9592 /* Not a suitable memory address. */
9596 /* All the useful information has now been extracted from the
9597 operands into unsorted_regs and unsorted_offsets; additionally,
9598 order[0] has been set to the lowest offset in the list. Sort
9599 the offsets into order, verifying that they are adjacent, and
9600 check that the register numbers are ascending. */
9601 if (!compute_offset_order (nops, unsorted_offsets, order,
9602 check_regs ? unsorted_regs : NULL))
9606 memcpy (saved_order, order, sizeof order);
9612 for (i = 0; i < nops; i++)
9613 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9615 *load_offset = unsorted_offsets[order[0]];
9619 && !peep2_reg_dead_p (nops, base_reg_rtx))
9622 if (unsorted_offsets[order[0]] == 0)
9623 ldm_case = 1; /* ldmia */
9624 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9625 ldm_case = 2; /* ldmib */
9626 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9627 ldm_case = 3; /* ldmda */
9628 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9629 ldm_case = 4; /* ldmdb */
9630 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9631 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9636 if (!multiple_operation_profitable_p (false, nops,
9638 ? unsorted_offsets[order[0]] : 0))
9644 /* Used to determine in a peephole whether a sequence of store instructions can
9645 be changed into a store-multiple instruction.
9646 NOPS is the number of separate store instructions we are examining.
9647 NOPS_TOTAL is the total number of instructions recognized by the peephole
9649 The first NOPS entries in OPERANDS are the source registers, the next
9650 NOPS entries are memory operands. If this function is successful, *BASE is
9651 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9652 to the first memory location's offset from that base register. REGS is an
9653 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9654 likewise filled with the corresponding rtx's.
9655 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9656 numbers to to an ascending order of stores.
9657 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9658 from ascending memory locations, and the function verifies that the register
9659 numbers are themselves ascending. If CHECK_REGS is false, the register
9660 numbers are stored in the order they are found in the operands. */
9662 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9663 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9664 HOST_WIDE_INT *load_offset, bool check_regs)
9666 int unsorted_regs[MAX_LDM_STM_OPS];
9667 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9668 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9669 int order[MAX_LDM_STM_OPS];
9671 rtx base_reg_rtx = NULL;
9674 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9675 easily extended if required. */
9676 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9678 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9680 /* Loop over the operands and check that the memory references are
9681 suitable (i.e. immediate offsets from the same base register). At
9682 the same time, extract the target register, and the memory
9684 for (i = 0; i < nops; i++)
9689 /* Convert a subreg of a mem into the mem itself. */
9690 if (GET_CODE (operands[nops + i]) == SUBREG)
9691 operands[nops + i] = alter_subreg (operands + (nops + i));
9693 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9695 /* Don't reorder volatile memory references; it doesn't seem worth
9696 looking for the case where the order is ok anyway. */
9697 if (MEM_VOLATILE_P (operands[nops + i]))
9700 offset = const0_rtx;
9702 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9703 || (GET_CODE (reg) == SUBREG
9704 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9705 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9706 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9708 || (GET_CODE (reg) == SUBREG
9709 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9710 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9713 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9714 ? operands[i] : SUBREG_REG (operands[i]));
9715 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9719 base_reg = REGNO (reg);
9721 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9724 else if (base_reg != (int) REGNO (reg))
9725 /* Not addressed from the same base register. */
9728 /* If it isn't an integer register, then we can't do this. */
9729 if (unsorted_regs[i] < 0
9730 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9731 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9732 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9733 || unsorted_regs[i] > 14)
9736 unsorted_offsets[i] = INTVAL (offset);
9737 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9741 /* Not a suitable memory address. */
9745 /* All the useful information has now been extracted from the
9746 operands into unsorted_regs and unsorted_offsets; additionally,
9747 order[0] has been set to the lowest offset in the list. Sort
9748 the offsets into order, verifying that they are adjacent, and
9749 check that the register numbers are ascending. */
9750 if (!compute_offset_order (nops, unsorted_offsets, order,
9751 check_regs ? unsorted_regs : NULL))
9755 memcpy (saved_order, order, sizeof order);
9761 for (i = 0; i < nops; i++)
9763 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9765 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9768 *load_offset = unsorted_offsets[order[0]];
9772 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9775 if (unsorted_offsets[order[0]] == 0)
9776 stm_case = 1; /* stmia */
9777 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9778 stm_case = 2; /* stmib */
9779 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9780 stm_case = 3; /* stmda */
9781 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9782 stm_case = 4; /* stmdb */
9786 if (!multiple_operation_profitable_p (false, nops, 0))
9792 /* Routines for use in generating RTL. */
9794 /* Generate a load-multiple instruction. COUNT is the number of loads in
9795 the instruction; REGS and MEMS are arrays containing the operands.
9796 BASEREG is the base register to be used in addressing the memory operands.
9797 WBACK_OFFSET is nonzero if the instruction should update the base
9801 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9802 HOST_WIDE_INT wback_offset)
9807 if (!multiple_operation_profitable_p (false, count, 0))
9813 for (i = 0; i < count; i++)
9814 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9816 if (wback_offset != 0)
9817 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9825 result = gen_rtx_PARALLEL (VOIDmode,
9826 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9827 if (wback_offset != 0)
9829 XVECEXP (result, 0, 0)
9830 = gen_rtx_SET (VOIDmode, basereg,
9831 plus_constant (basereg, wback_offset));
9836 for (j = 0; i < count; i++, j++)
9837 XVECEXP (result, 0, i)
9838 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9843 /* Generate a store-multiple instruction. COUNT is the number of stores in
9844 the instruction; REGS and MEMS are arrays containing the operands.
9845 BASEREG is the base register to be used in addressing the memory operands.
9846 WBACK_OFFSET is nonzero if the instruction should update the base
9850 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9851 HOST_WIDE_INT wback_offset)
9856 if (GET_CODE (basereg) == PLUS)
9857 basereg = XEXP (basereg, 0);
9859 if (!multiple_operation_profitable_p (false, count, 0))
9865 for (i = 0; i < count; i++)
9866 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9868 if (wback_offset != 0)
9869 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9877 result = gen_rtx_PARALLEL (VOIDmode,
9878 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9879 if (wback_offset != 0)
9881 XVECEXP (result, 0, 0)
9882 = gen_rtx_SET (VOIDmode, basereg,
9883 plus_constant (basereg, wback_offset));
9888 for (j = 0; i < count; i++, j++)
9889 XVECEXP (result, 0, i)
9890 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9895 /* Generate either a load-multiple or a store-multiple instruction. This
9896 function can be used in situations where we can start with a single MEM
9897 rtx and adjust its address upwards.
9898 COUNT is the number of operations in the instruction, not counting a
9899 possible update of the base register. REGS is an array containing the
9901 BASEREG is the base register to be used in addressing the memory operands,
9902 which are constructed from BASEMEM.
9903 WRITE_BACK specifies whether the generated instruction should include an
9904 update of the base register.
9905 OFFSETP is used to pass an offset to and from this function; this offset
9906 is not used when constructing the address (instead BASEMEM should have an
9907 appropriate offset in its address), it is used only for setting
9908 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9911 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9912 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9914 rtx mems[MAX_LDM_STM_OPS];
9915 HOST_WIDE_INT offset = *offsetp;
9918 gcc_assert (count <= MAX_LDM_STM_OPS);
9920 if (GET_CODE (basereg) == PLUS)
9921 basereg = XEXP (basereg, 0);
9923 for (i = 0; i < count; i++)
9925 rtx addr = plus_constant (basereg, i * 4);
9926 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9934 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9935 write_back ? 4 * count : 0);
9937 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9938 write_back ? 4 * count : 0);
9942 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9943 rtx basemem, HOST_WIDE_INT *offsetp)
9945 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9950 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9951 rtx basemem, HOST_WIDE_INT *offsetp)
9953 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9957 /* Called from a peephole2 expander to turn a sequence of loads into an
9958 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9959 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9960 is true if we can reorder the registers because they are used commutatively
9962 Returns true iff we could generate a new instruction. */
9965 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9967 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9968 rtx mems[MAX_LDM_STM_OPS];
9971 HOST_WIDE_INT offset;
9972 int write_back = FALSE;
9976 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9977 &base_reg, &offset, !sort_regs);
9983 for (i = 0; i < nops - 1; i++)
9984 for (j = i + 1; j < nops; j++)
9985 if (regs[i] > regs[j])
9991 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9995 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9996 gcc_assert (ldm_case == 1 || ldm_case == 5);
10002 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10003 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10005 if (!TARGET_THUMB1)
10007 base_reg = regs[0];
10008 base_reg_rtx = newbase;
10012 for (i = 0; i < nops; i++)
10014 addr = plus_constant (base_reg_rtx, offset + i * 4);
10015 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10018 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10019 write_back ? offset + i * 4 : 0));
10023 /* Called from a peephole2 expander to turn a sequence of stores into an
10024 STM instruction. OPERANDS are the operands found by the peephole matcher;
10025 NOPS indicates how many separate stores we are trying to combine.
10026 Returns true iff we could generate a new instruction. */
10029 gen_stm_seq (rtx *operands, int nops)
10032 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10033 rtx mems[MAX_LDM_STM_OPS];
10036 HOST_WIDE_INT offset;
10037 int write_back = FALSE;
10040 bool base_reg_dies;
10042 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10043 mem_order, &base_reg, &offset, true);
10048 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10050 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10053 gcc_assert (base_reg_dies);
10059 gcc_assert (base_reg_dies);
10060 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10064 addr = plus_constant (base_reg_rtx, offset);
10066 for (i = 0; i < nops; i++)
10068 addr = plus_constant (base_reg_rtx, offset + i * 4);
10069 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10072 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10073 write_back ? offset + i * 4 : 0));
10077 /* Called from a peephole2 expander to turn a sequence of stores that are
10078 preceded by constant loads into an STM instruction. OPERANDS are the
10079 operands found by the peephole matcher; NOPS indicates how many
10080 separate stores we are trying to combine; there are 2 * NOPS
10081 instructions in the peephole.
10082 Returns true iff we could generate a new instruction. */
10085 gen_const_stm_seq (rtx *operands, int nops)
10087 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10088 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10089 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10090 rtx mems[MAX_LDM_STM_OPS];
10093 HOST_WIDE_INT offset;
10094 int write_back = FALSE;
10097 bool base_reg_dies;
10099 HARD_REG_SET allocated;
10101 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10102 mem_order, &base_reg, &offset, false);
10107 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10109 /* If the same register is used more than once, try to find a free
10111 CLEAR_HARD_REG_SET (allocated);
10112 for (i = 0; i < nops; i++)
10114 for (j = i + 1; j < nops; j++)
10115 if (regs[i] == regs[j])
10117 rtx t = peep2_find_free_register (0, nops * 2,
10118 TARGET_THUMB1 ? "l" : "r",
10119 SImode, &allocated);
10123 regs[i] = REGNO (t);
10127 /* Compute an ordering that maps the register numbers to an ascending
10130 for (i = 0; i < nops; i++)
10131 if (regs[i] < regs[reg_order[0]])
10134 for (i = 1; i < nops; i++)
10136 int this_order = reg_order[i - 1];
10137 for (j = 0; j < nops; j++)
10138 if (regs[j] > regs[reg_order[i - 1]]
10139 && (this_order == reg_order[i - 1]
10140 || regs[j] < regs[this_order]))
10142 reg_order[i] = this_order;
10145 /* Ensure that registers that must be live after the instruction end
10146 up with the correct value. */
10147 for (i = 0; i < nops; i++)
10149 int this_order = reg_order[i];
10150 if ((this_order != mem_order[i]
10151 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10152 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10156 /* Load the constants. */
10157 for (i = 0; i < nops; i++)
10159 rtx op = operands[2 * nops + mem_order[i]];
10160 sorted_regs[i] = regs[reg_order[i]];
10161 emit_move_insn (reg_rtxs[reg_order[i]], op);
10164 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10166 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10169 gcc_assert (base_reg_dies);
10175 gcc_assert (base_reg_dies);
10176 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10180 addr = plus_constant (base_reg_rtx, offset);
10182 for (i = 0; i < nops; i++)
10184 addr = plus_constant (base_reg_rtx, offset + i * 4);
10185 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10188 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10189 write_back ? offset + i * 4 : 0));
10194 arm_gen_movmemqi (rtx *operands)
10196 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10197 HOST_WIDE_INT srcoffset, dstoffset;
10199 rtx src, dst, srcbase, dstbase;
10200 rtx part_bytes_reg = NULL;
10203 if (GET_CODE (operands[2]) != CONST_INT
10204 || GET_CODE (operands[3]) != CONST_INT
10205 || INTVAL (operands[2]) > 64
10206 || INTVAL (operands[3]) & 3)
10209 dstbase = operands[0];
10210 srcbase = operands[1];
10212 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10213 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10215 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10216 out_words_to_go = INTVAL (operands[2]) / 4;
10217 last_bytes = INTVAL (operands[2]) & 3;
10218 dstoffset = srcoffset = 0;
10220 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10221 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10223 for (i = 0; in_words_to_go >= 2; i+=4)
10225 if (in_words_to_go > 4)
10226 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10227 TRUE, srcbase, &srcoffset));
10229 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10230 src, FALSE, srcbase,
10233 if (out_words_to_go)
10235 if (out_words_to_go > 4)
10236 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10237 TRUE, dstbase, &dstoffset));
10238 else if (out_words_to_go != 1)
10239 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10240 out_words_to_go, dst,
10243 dstbase, &dstoffset));
10246 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10247 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10248 if (last_bytes != 0)
10250 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10256 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10257 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10260 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10261 if (out_words_to_go)
10265 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10266 sreg = copy_to_reg (mem);
10268 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10269 emit_move_insn (mem, sreg);
10272 gcc_assert (!in_words_to_go); /* Sanity check */
10275 if (in_words_to_go)
10277 gcc_assert (in_words_to_go > 0);
10279 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10280 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10283 gcc_assert (!last_bytes || part_bytes_reg);
10285 if (BYTES_BIG_ENDIAN && last_bytes)
10287 rtx tmp = gen_reg_rtx (SImode);
10289 /* The bytes we want are in the top end of the word. */
10290 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10291 GEN_INT (8 * (4 - last_bytes))));
10292 part_bytes_reg = tmp;
10296 mem = adjust_automodify_address (dstbase, QImode,
10297 plus_constant (dst, last_bytes - 1),
10298 dstoffset + last_bytes - 1);
10299 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10303 tmp = gen_reg_rtx (SImode);
10304 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10305 part_bytes_reg = tmp;
10312 if (last_bytes > 1)
10314 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10315 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10319 rtx tmp = gen_reg_rtx (SImode);
10320 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10321 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10322 part_bytes_reg = tmp;
10329 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10330 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10337 /* Select a dominance comparison mode if possible for a test of the general
10338 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10339 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10340 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10341 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10342 In all cases OP will be either EQ or NE, but we don't need to know which
10343 here. If we are unable to support a dominance comparison we return
10344 CC mode. This will then fail to match for the RTL expressions that
10345 generate this call. */
10347 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10349 enum rtx_code cond1, cond2;
10352 /* Currently we will probably get the wrong result if the individual
10353 comparisons are not simple. This also ensures that it is safe to
10354 reverse a comparison if necessary. */
10355 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10357 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10361 /* The if_then_else variant of this tests the second condition if the
10362 first passes, but is true if the first fails. Reverse the first
10363 condition to get a true "inclusive-or" expression. */
10364 if (cond_or == DOM_CC_NX_OR_Y)
10365 cond1 = reverse_condition (cond1);
10367 /* If the comparisons are not equal, and one doesn't dominate the other,
10368 then we can't do this. */
10370 && !comparison_dominates_p (cond1, cond2)
10371 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10376 enum rtx_code temp = cond1;
10384 if (cond_or == DOM_CC_X_AND_Y)
10389 case EQ: return CC_DEQmode;
10390 case LE: return CC_DLEmode;
10391 case LEU: return CC_DLEUmode;
10392 case GE: return CC_DGEmode;
10393 case GEU: return CC_DGEUmode;
10394 default: gcc_unreachable ();
10398 if (cond_or == DOM_CC_X_AND_Y)
10410 gcc_unreachable ();
10414 if (cond_or == DOM_CC_X_AND_Y)
10426 gcc_unreachable ();
10430 if (cond_or == DOM_CC_X_AND_Y)
10431 return CC_DLTUmode;
10436 return CC_DLTUmode;
10438 return CC_DLEUmode;
10442 gcc_unreachable ();
10446 if (cond_or == DOM_CC_X_AND_Y)
10447 return CC_DGTUmode;
10452 return CC_DGTUmode;
10454 return CC_DGEUmode;
10458 gcc_unreachable ();
10461 /* The remaining cases only occur when both comparisons are the
10464 gcc_assert (cond1 == cond2);
10468 gcc_assert (cond1 == cond2);
10472 gcc_assert (cond1 == cond2);
10476 gcc_assert (cond1 == cond2);
10477 return CC_DLEUmode;
10480 gcc_assert (cond1 == cond2);
10481 return CC_DGEUmode;
10484 gcc_unreachable ();
10489 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10491 /* All floating point compares return CCFP if it is an equality
10492 comparison, and CCFPE otherwise. */
10493 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10513 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10518 gcc_unreachable ();
10522 /* A compare with a shifted operand. Because of canonicalization, the
10523 comparison will have to be swapped when we emit the assembler. */
10524 if (GET_MODE (y) == SImode
10525 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10526 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10527 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10528 || GET_CODE (x) == ROTATERT))
10531 /* This operation is performed swapped, but since we only rely on the Z
10532 flag we don't need an additional mode. */
10533 if (GET_MODE (y) == SImode
10534 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10535 && GET_CODE (x) == NEG
10536 && (op == EQ || op == NE))
10539 /* This is a special case that is used by combine to allow a
10540 comparison of a shifted byte load to be split into a zero-extend
10541 followed by a comparison of the shifted integer (only valid for
10542 equalities and unsigned inequalities). */
10543 if (GET_MODE (x) == SImode
10544 && GET_CODE (x) == ASHIFT
10545 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10546 && GET_CODE (XEXP (x, 0)) == SUBREG
10547 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10548 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10549 && (op == EQ || op == NE
10550 || op == GEU || op == GTU || op == LTU || op == LEU)
10551 && GET_CODE (y) == CONST_INT)
10554 /* A construct for a conditional compare, if the false arm contains
10555 0, then both conditions must be true, otherwise either condition
10556 must be true. Not all conditions are possible, so CCmode is
10557 returned if it can't be done. */
10558 if (GET_CODE (x) == IF_THEN_ELSE
10559 && (XEXP (x, 2) == const0_rtx
10560 || XEXP (x, 2) == const1_rtx)
10561 && COMPARISON_P (XEXP (x, 0))
10562 && COMPARISON_P (XEXP (x, 1)))
10563 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10564 INTVAL (XEXP (x, 2)));
10566 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10567 if (GET_CODE (x) == AND
10568 && (op == EQ || op == NE)
10569 && COMPARISON_P (XEXP (x, 0))
10570 && COMPARISON_P (XEXP (x, 1)))
10571 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10574 if (GET_CODE (x) == IOR
10575 && (op == EQ || op == NE)
10576 && COMPARISON_P (XEXP (x, 0))
10577 && COMPARISON_P (XEXP (x, 1)))
10578 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10581 /* An operation (on Thumb) where we want to test for a single bit.
10582 This is done by shifting that bit up into the top bit of a
10583 scratch register; we can then branch on the sign bit. */
10585 && GET_MODE (x) == SImode
10586 && (op == EQ || op == NE)
10587 && GET_CODE (x) == ZERO_EXTRACT
10588 && XEXP (x, 1) == const1_rtx)
10591 /* An operation that sets the condition codes as a side-effect, the
10592 V flag is not set correctly, so we can only use comparisons where
10593 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10595 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10596 if (GET_MODE (x) == SImode
10598 && (op == EQ || op == NE || op == LT || op == GE)
10599 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10600 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10601 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10602 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10603 || GET_CODE (x) == LSHIFTRT
10604 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10605 || GET_CODE (x) == ROTATERT
10606 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10607 return CC_NOOVmode;
10609 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10612 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10613 && GET_CODE (x) == PLUS
10614 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10617 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10619 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10621 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10628 /* A DImode comparison against zero can be implemented by
10629 or'ing the two halves together. */
10630 if (y == const0_rtx)
10633 /* We can do an equality test in three Thumb instructions. */
10643 /* DImode unsigned comparisons can be implemented by cmp +
10644 cmpeq without a scratch register. Not worth doing in
10655 /* DImode signed and unsigned comparisons can be implemented
10656 by cmp + sbcs with a scratch register, but that does not
10657 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10658 gcc_assert (op != EQ && op != NE);
10662 gcc_unreachable ();
10669 /* X and Y are two things to compare using CODE. Emit the compare insn and
10670 return the rtx for register 0 in the proper mode. FP means this is a
10671 floating point compare: I don't think that it is needed on the arm. */
10673 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10675 enum machine_mode mode;
10677 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10679 /* We might have X as a constant, Y as a register because of the predicates
10680 used for cmpdi. If so, force X to a register here. */
10681 if (dimode_comparison && !REG_P (x))
10682 x = force_reg (DImode, x);
10684 mode = SELECT_CC_MODE (code, x, y);
10685 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10687 if (dimode_comparison
10688 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10689 && mode != CC_CZmode)
10693 /* To compare two non-zero values for equality, XOR them and
10694 then compare against zero. Not used for ARM mode; there
10695 CC_CZmode is cheaper. */
10696 if (mode == CC_Zmode && y != const0_rtx)
10698 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10701 /* A scratch register is required. */
10702 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10703 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10704 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10707 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10712 /* Generate a sequence of insns that will generate the correct return
10713 address mask depending on the physical architecture that the program
10716 arm_gen_return_addr_mask (void)
10718 rtx reg = gen_reg_rtx (Pmode);
10720 emit_insn (gen_return_addr_mask (reg));
10725 arm_reload_in_hi (rtx *operands)
10727 rtx ref = operands[1];
10729 HOST_WIDE_INT offset = 0;
10731 if (GET_CODE (ref) == SUBREG)
10733 offset = SUBREG_BYTE (ref);
10734 ref = SUBREG_REG (ref);
10737 if (GET_CODE (ref) == REG)
10739 /* We have a pseudo which has been spilt onto the stack; there
10740 are two cases here: the first where there is a simple
10741 stack-slot replacement and a second where the stack-slot is
10742 out of range, or is used as a subreg. */
10743 if (reg_equiv_mem (REGNO (ref)))
10745 ref = reg_equiv_mem (REGNO (ref));
10746 base = find_replacement (&XEXP (ref, 0));
10749 /* The slot is out of range, or was dressed up in a SUBREG. */
10750 base = reg_equiv_address (REGNO (ref));
10753 base = find_replacement (&XEXP (ref, 0));
10755 /* Handle the case where the address is too complex to be offset by 1. */
10756 if (GET_CODE (base) == MINUS
10757 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10759 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10761 emit_set_insn (base_plus, base);
10764 else if (GET_CODE (base) == PLUS)
10766 /* The addend must be CONST_INT, or we would have dealt with it above. */
10767 HOST_WIDE_INT hi, lo;
10769 offset += INTVAL (XEXP (base, 1));
10770 base = XEXP (base, 0);
10772 /* Rework the address into a legal sequence of insns. */
10773 /* Valid range for lo is -4095 -> 4095 */
10776 : -((-offset) & 0xfff));
10778 /* Corner case, if lo is the max offset then we would be out of range
10779 once we have added the additional 1 below, so bump the msb into the
10780 pre-loading insn(s). */
10784 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10785 ^ (HOST_WIDE_INT) 0x80000000)
10786 - (HOST_WIDE_INT) 0x80000000);
10788 gcc_assert (hi + lo == offset);
10792 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10794 /* Get the base address; addsi3 knows how to handle constants
10795 that require more than one insn. */
10796 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10802 /* Operands[2] may overlap operands[0] (though it won't overlap
10803 operands[1]), that's why we asked for a DImode reg -- so we can
10804 use the bit that does not overlap. */
10805 if (REGNO (operands[2]) == REGNO (operands[0]))
10806 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10808 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10810 emit_insn (gen_zero_extendqisi2 (scratch,
10811 gen_rtx_MEM (QImode,
10812 plus_constant (base,
10814 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10815 gen_rtx_MEM (QImode,
10816 plus_constant (base,
10818 if (!BYTES_BIG_ENDIAN)
10819 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10820 gen_rtx_IOR (SImode,
10823 gen_rtx_SUBREG (SImode, operands[0], 0),
10827 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10828 gen_rtx_IOR (SImode,
10829 gen_rtx_ASHIFT (SImode, scratch,
10831 gen_rtx_SUBREG (SImode, operands[0], 0)));
10834 /* Handle storing a half-word to memory during reload by synthesizing as two
10835 byte stores. Take care not to clobber the input values until after we
10836 have moved them somewhere safe. This code assumes that if the DImode
10837 scratch in operands[2] overlaps either the input value or output address
10838 in some way, then that value must die in this insn (we absolutely need
10839 two scratch registers for some corner cases). */
10841 arm_reload_out_hi (rtx *operands)
10843 rtx ref = operands[0];
10844 rtx outval = operands[1];
10846 HOST_WIDE_INT offset = 0;
10848 if (GET_CODE (ref) == SUBREG)
10850 offset = SUBREG_BYTE (ref);
10851 ref = SUBREG_REG (ref);
10854 if (GET_CODE (ref) == REG)
10856 /* We have a pseudo which has been spilt onto the stack; there
10857 are two cases here: the first where there is a simple
10858 stack-slot replacement and a second where the stack-slot is
10859 out of range, or is used as a subreg. */
10860 if (reg_equiv_mem (REGNO (ref)))
10862 ref = reg_equiv_mem (REGNO (ref));
10863 base = find_replacement (&XEXP (ref, 0));
10866 /* The slot is out of range, or was dressed up in a SUBREG. */
10867 base = reg_equiv_address (REGNO (ref));
10870 base = find_replacement (&XEXP (ref, 0));
10872 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10874 /* Handle the case where the address is too complex to be offset by 1. */
10875 if (GET_CODE (base) == MINUS
10876 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10878 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10880 /* Be careful not to destroy OUTVAL. */
10881 if (reg_overlap_mentioned_p (base_plus, outval))
10883 /* Updating base_plus might destroy outval, see if we can
10884 swap the scratch and base_plus. */
10885 if (!reg_overlap_mentioned_p (scratch, outval))
10888 scratch = base_plus;
10893 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10895 /* Be conservative and copy OUTVAL into the scratch now,
10896 this should only be necessary if outval is a subreg
10897 of something larger than a word. */
10898 /* XXX Might this clobber base? I can't see how it can,
10899 since scratch is known to overlap with OUTVAL, and
10900 must be wider than a word. */
10901 emit_insn (gen_movhi (scratch_hi, outval));
10902 outval = scratch_hi;
10906 emit_set_insn (base_plus, base);
10909 else if (GET_CODE (base) == PLUS)
10911 /* The addend must be CONST_INT, or we would have dealt with it above. */
10912 HOST_WIDE_INT hi, lo;
10914 offset += INTVAL (XEXP (base, 1));
10915 base = XEXP (base, 0);
10917 /* Rework the address into a legal sequence of insns. */
10918 /* Valid range for lo is -4095 -> 4095 */
10921 : -((-offset) & 0xfff));
10923 /* Corner case, if lo is the max offset then we would be out of range
10924 once we have added the additional 1 below, so bump the msb into the
10925 pre-loading insn(s). */
10929 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10930 ^ (HOST_WIDE_INT) 0x80000000)
10931 - (HOST_WIDE_INT) 0x80000000);
10933 gcc_assert (hi + lo == offset);
10937 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10939 /* Be careful not to destroy OUTVAL. */
10940 if (reg_overlap_mentioned_p (base_plus, outval))
10942 /* Updating base_plus might destroy outval, see if we
10943 can swap the scratch and base_plus. */
10944 if (!reg_overlap_mentioned_p (scratch, outval))
10947 scratch = base_plus;
10952 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10954 /* Be conservative and copy outval into scratch now,
10955 this should only be necessary if outval is a
10956 subreg of something larger than a word. */
10957 /* XXX Might this clobber base? I can't see how it
10958 can, since scratch is known to overlap with
10960 emit_insn (gen_movhi (scratch_hi, outval));
10961 outval = scratch_hi;
10965 /* Get the base address; addsi3 knows how to handle constants
10966 that require more than one insn. */
10967 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10973 if (BYTES_BIG_ENDIAN)
10975 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10976 plus_constant (base, offset + 1)),
10977 gen_lowpart (QImode, outval)));
10978 emit_insn (gen_lshrsi3 (scratch,
10979 gen_rtx_SUBREG (SImode, outval, 0),
10981 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10982 gen_lowpart (QImode, scratch)));
10986 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10987 gen_lowpart (QImode, outval)));
10988 emit_insn (gen_lshrsi3 (scratch,
10989 gen_rtx_SUBREG (SImode, outval, 0),
10991 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10992 plus_constant (base, offset + 1)),
10993 gen_lowpart (QImode, scratch)));
10997 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10998 (padded to the size of a word) should be passed in a register. */
11001 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11003 if (TARGET_AAPCS_BASED)
11004 return must_pass_in_stack_var_size (mode, type);
11006 return must_pass_in_stack_var_size_or_pad (mode, type);
11010 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11011 Return true if an argument passed on the stack should be padded upwards,
11012 i.e. if the least-significant byte has useful data.
11013 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11014 aggregate types are placed in the lowest memory address. */
11017 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11019 if (!TARGET_AAPCS_BASED)
11020 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11022 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11029 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11030 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11031 byte of the register has useful data, and return the opposite if the
11032 most significant byte does.
11033 For AAPCS, small aggregates and small complex types are always padded
11037 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11038 tree type, int first ATTRIBUTE_UNUSED)
11040 if (TARGET_AAPCS_BASED
11041 && BYTES_BIG_ENDIAN
11042 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11043 && int_size_in_bytes (type) <= 4)
11046 /* Otherwise, use default padding. */
11047 return !BYTES_BIG_ENDIAN;
11051 /* Print a symbolic form of X to the debug file, F. */
11053 arm_print_value (FILE *f, rtx x)
11055 switch (GET_CODE (x))
11058 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11062 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11070 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11072 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11073 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11081 fprintf (f, "\"%s\"", XSTR (x, 0));
11085 fprintf (f, "`%s'", XSTR (x, 0));
11089 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11093 arm_print_value (f, XEXP (x, 0));
11097 arm_print_value (f, XEXP (x, 0));
11099 arm_print_value (f, XEXP (x, 1));
11107 fprintf (f, "????");
11112 /* Routines for manipulation of the constant pool. */
11114 /* Arm instructions cannot load a large constant directly into a
11115 register; they have to come from a pc relative load. The constant
11116 must therefore be placed in the addressable range of the pc
11117 relative load. Depending on the precise pc relative load
11118 instruction the range is somewhere between 256 bytes and 4k. This
11119 means that we often have to dump a constant inside a function, and
11120 generate code to branch around it.
11122 It is important to minimize this, since the branches will slow
11123 things down and make the code larger.
11125 Normally we can hide the table after an existing unconditional
11126 branch so that there is no interruption of the flow, but in the
11127 worst case the code looks like this:
11145 We fix this by performing a scan after scheduling, which notices
11146 which instructions need to have their operands fetched from the
11147 constant table and builds the table.
11149 The algorithm starts by building a table of all the constants that
11150 need fixing up and all the natural barriers in the function (places
11151 where a constant table can be dropped without breaking the flow).
11152 For each fixup we note how far the pc-relative replacement will be
11153 able to reach and the offset of the instruction into the function.
11155 Having built the table we then group the fixes together to form
11156 tables that are as large as possible (subject to addressing
11157 constraints) and emit each table of constants after the last
11158 barrier that is within range of all the instructions in the group.
11159 If a group does not contain a barrier, then we forcibly create one
11160 by inserting a jump instruction into the flow. Once the table has
11161 been inserted, the insns are then modified to reference the
11162 relevant entry in the pool.
11164 Possible enhancements to the algorithm (not implemented) are:
11166 1) For some processors and object formats, there may be benefit in
11167 aligning the pools to the start of cache lines; this alignment
11168 would need to be taken into account when calculating addressability
11171 /* These typedefs are located at the start of this file, so that
11172 they can be used in the prototypes there. This comment is to
11173 remind readers of that fact so that the following structures
11174 can be understood more easily.
11176 typedef struct minipool_node Mnode;
11177 typedef struct minipool_fixup Mfix; */
11179 struct minipool_node
11181 /* Doubly linked chain of entries. */
11184 /* The maximum offset into the code that this entry can be placed. While
11185 pushing fixes for forward references, all entries are sorted in order
11186 of increasing max_address. */
11187 HOST_WIDE_INT max_address;
11188 /* Similarly for an entry inserted for a backwards ref. */
11189 HOST_WIDE_INT min_address;
11190 /* The number of fixes referencing this entry. This can become zero
11191 if we "unpush" an entry. In this case we ignore the entry when we
11192 come to emit the code. */
11194 /* The offset from the start of the minipool. */
11195 HOST_WIDE_INT offset;
11196 /* The value in table. */
11198 /* The mode of value. */
11199 enum machine_mode mode;
11200 /* The size of the value. With iWMMXt enabled
11201 sizes > 4 also imply an alignment of 8-bytes. */
11205 struct minipool_fixup
11209 HOST_WIDE_INT address;
11211 enum machine_mode mode;
11215 HOST_WIDE_INT forwards;
11216 HOST_WIDE_INT backwards;
11219 /* Fixes less than a word need padding out to a word boundary. */
11220 #define MINIPOOL_FIX_SIZE(mode) \
11221 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11223 static Mnode * minipool_vector_head;
11224 static Mnode * minipool_vector_tail;
11225 static rtx minipool_vector_label;
11226 static int minipool_pad;
11228 /* The linked list of all minipool fixes required for this function. */
11229 Mfix * minipool_fix_head;
11230 Mfix * minipool_fix_tail;
11231 /* The fix entry for the current minipool, once it has been placed. */
11232 Mfix * minipool_barrier;
11234 /* Determines if INSN is the start of a jump table. Returns the end
11235 of the TABLE or NULL_RTX. */
11237 is_jump_table (rtx insn)
11241 if (GET_CODE (insn) == JUMP_INSN
11242 && JUMP_LABEL (insn) != NULL
11243 && ((table = next_real_insn (JUMP_LABEL (insn)))
11244 == next_real_insn (insn))
11246 && GET_CODE (table) == JUMP_INSN
11247 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11248 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11254 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11255 #define JUMP_TABLES_IN_TEXT_SECTION 0
11258 static HOST_WIDE_INT
11259 get_jump_table_size (rtx insn)
11261 /* ADDR_VECs only take room if read-only data does into the text
11263 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11265 rtx body = PATTERN (insn);
11266 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11267 HOST_WIDE_INT size;
11268 HOST_WIDE_INT modesize;
11270 modesize = GET_MODE_SIZE (GET_MODE (body));
11271 size = modesize * XVECLEN (body, elt);
11275 /* Round up size of TBB table to a halfword boundary. */
11276 size = (size + 1) & ~(HOST_WIDE_INT)1;
11279 /* No padding necessary for TBH. */
11282 /* Add two bytes for alignment on Thumb. */
11287 gcc_unreachable ();
11295 /* Move a minipool fix MP from its current location to before MAX_MP.
11296 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11297 constraints may need updating. */
11299 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11300 HOST_WIDE_INT max_address)
11302 /* The code below assumes these are different. */
11303 gcc_assert (mp != max_mp);
11305 if (max_mp == NULL)
11307 if (max_address < mp->max_address)
11308 mp->max_address = max_address;
11312 if (max_address > max_mp->max_address - mp->fix_size)
11313 mp->max_address = max_mp->max_address - mp->fix_size;
11315 mp->max_address = max_address;
11317 /* Unlink MP from its current position. Since max_mp is non-null,
11318 mp->prev must be non-null. */
11319 mp->prev->next = mp->next;
11320 if (mp->next != NULL)
11321 mp->next->prev = mp->prev;
11323 minipool_vector_tail = mp->prev;
11325 /* Re-insert it before MAX_MP. */
11327 mp->prev = max_mp->prev;
11330 if (mp->prev != NULL)
11331 mp->prev->next = mp;
11333 minipool_vector_head = mp;
11336 /* Save the new entry. */
11339 /* Scan over the preceding entries and adjust their addresses as
11341 while (mp->prev != NULL
11342 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11344 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11351 /* Add a constant to the minipool for a forward reference. Returns the
11352 node added or NULL if the constant will not fit in this pool. */
11354 add_minipool_forward_ref (Mfix *fix)
11356 /* If set, max_mp is the first pool_entry that has a lower
11357 constraint than the one we are trying to add. */
11358 Mnode * max_mp = NULL;
11359 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11362 /* If the minipool starts before the end of FIX->INSN then this FIX
11363 can not be placed into the current pool. Furthermore, adding the
11364 new constant pool entry may cause the pool to start FIX_SIZE bytes
11366 if (minipool_vector_head &&
11367 (fix->address + get_attr_length (fix->insn)
11368 >= minipool_vector_head->max_address - fix->fix_size))
11371 /* Scan the pool to see if a constant with the same value has
11372 already been added. While we are doing this, also note the
11373 location where we must insert the constant if it doesn't already
11375 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11377 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11378 && fix->mode == mp->mode
11379 && (GET_CODE (fix->value) != CODE_LABEL
11380 || (CODE_LABEL_NUMBER (fix->value)
11381 == CODE_LABEL_NUMBER (mp->value)))
11382 && rtx_equal_p (fix->value, mp->value))
11384 /* More than one fix references this entry. */
11386 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11389 /* Note the insertion point if necessary. */
11391 && mp->max_address > max_address)
11394 /* If we are inserting an 8-bytes aligned quantity and
11395 we have not already found an insertion point, then
11396 make sure that all such 8-byte aligned quantities are
11397 placed at the start of the pool. */
11398 if (ARM_DOUBLEWORD_ALIGN
11400 && fix->fix_size >= 8
11401 && mp->fix_size < 8)
11404 max_address = mp->max_address;
11408 /* The value is not currently in the minipool, so we need to create
11409 a new entry for it. If MAX_MP is NULL, the entry will be put on
11410 the end of the list since the placement is less constrained than
11411 any existing entry. Otherwise, we insert the new fix before
11412 MAX_MP and, if necessary, adjust the constraints on the other
11415 mp->fix_size = fix->fix_size;
11416 mp->mode = fix->mode;
11417 mp->value = fix->value;
11419 /* Not yet required for a backwards ref. */
11420 mp->min_address = -65536;
11422 if (max_mp == NULL)
11424 mp->max_address = max_address;
11426 mp->prev = minipool_vector_tail;
11428 if (mp->prev == NULL)
11430 minipool_vector_head = mp;
11431 minipool_vector_label = gen_label_rtx ();
11434 mp->prev->next = mp;
11436 minipool_vector_tail = mp;
11440 if (max_address > max_mp->max_address - mp->fix_size)
11441 mp->max_address = max_mp->max_address - mp->fix_size;
11443 mp->max_address = max_address;
11446 mp->prev = max_mp->prev;
11448 if (mp->prev != NULL)
11449 mp->prev->next = mp;
11451 minipool_vector_head = mp;
11454 /* Save the new entry. */
11457 /* Scan over the preceding entries and adjust their addresses as
11459 while (mp->prev != NULL
11460 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11462 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11470 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11471 HOST_WIDE_INT min_address)
11473 HOST_WIDE_INT offset;
11475 /* The code below assumes these are different. */
11476 gcc_assert (mp != min_mp);
11478 if (min_mp == NULL)
11480 if (min_address > mp->min_address)
11481 mp->min_address = min_address;
11485 /* We will adjust this below if it is too loose. */
11486 mp->min_address = min_address;
11488 /* Unlink MP from its current position. Since min_mp is non-null,
11489 mp->next must be non-null. */
11490 mp->next->prev = mp->prev;
11491 if (mp->prev != NULL)
11492 mp->prev->next = mp->next;
11494 minipool_vector_head = mp->next;
11496 /* Reinsert it after MIN_MP. */
11498 mp->next = min_mp->next;
11500 if (mp->next != NULL)
11501 mp->next->prev = mp;
11503 minipool_vector_tail = mp;
11509 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11511 mp->offset = offset;
11512 if (mp->refcount > 0)
11513 offset += mp->fix_size;
11515 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11516 mp->next->min_address = mp->min_address + mp->fix_size;
11522 /* Add a constant to the minipool for a backward reference. Returns the
11523 node added or NULL if the constant will not fit in this pool.
11525 Note that the code for insertion for a backwards reference can be
11526 somewhat confusing because the calculated offsets for each fix do
11527 not take into account the size of the pool (which is still under
11530 add_minipool_backward_ref (Mfix *fix)
11532 /* If set, min_mp is the last pool_entry that has a lower constraint
11533 than the one we are trying to add. */
11534 Mnode *min_mp = NULL;
11535 /* This can be negative, since it is only a constraint. */
11536 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11539 /* If we can't reach the current pool from this insn, or if we can't
11540 insert this entry at the end of the pool without pushing other
11541 fixes out of range, then we don't try. This ensures that we
11542 can't fail later on. */
11543 if (min_address >= minipool_barrier->address
11544 || (minipool_vector_tail->min_address + fix->fix_size
11545 >= minipool_barrier->address))
11548 /* Scan the pool to see if a constant with the same value has
11549 already been added. While we are doing this, also note the
11550 location where we must insert the constant if it doesn't already
11552 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11554 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11555 && fix->mode == mp->mode
11556 && (GET_CODE (fix->value) != CODE_LABEL
11557 || (CODE_LABEL_NUMBER (fix->value)
11558 == CODE_LABEL_NUMBER (mp->value)))
11559 && rtx_equal_p (fix->value, mp->value)
11560 /* Check that there is enough slack to move this entry to the
11561 end of the table (this is conservative). */
11562 && (mp->max_address
11563 > (minipool_barrier->address
11564 + minipool_vector_tail->offset
11565 + minipool_vector_tail->fix_size)))
11568 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11571 if (min_mp != NULL)
11572 mp->min_address += fix->fix_size;
11575 /* Note the insertion point if necessary. */
11576 if (mp->min_address < min_address)
11578 /* For now, we do not allow the insertion of 8-byte alignment
11579 requiring nodes anywhere but at the start of the pool. */
11580 if (ARM_DOUBLEWORD_ALIGN
11581 && fix->fix_size >= 8 && mp->fix_size < 8)
11586 else if (mp->max_address
11587 < minipool_barrier->address + mp->offset + fix->fix_size)
11589 /* Inserting before this entry would push the fix beyond
11590 its maximum address (which can happen if we have
11591 re-located a forwards fix); force the new fix to come
11593 if (ARM_DOUBLEWORD_ALIGN
11594 && fix->fix_size >= 8 && mp->fix_size < 8)
11599 min_address = mp->min_address + fix->fix_size;
11602 /* Do not insert a non-8-byte aligned quantity before 8-byte
11603 aligned quantities. */
11604 else if (ARM_DOUBLEWORD_ALIGN
11605 && fix->fix_size < 8
11606 && mp->fix_size >= 8)
11609 min_address = mp->min_address + fix->fix_size;
11614 /* We need to create a new entry. */
11616 mp->fix_size = fix->fix_size;
11617 mp->mode = fix->mode;
11618 mp->value = fix->value;
11620 mp->max_address = minipool_barrier->address + 65536;
11622 mp->min_address = min_address;
11624 if (min_mp == NULL)
11627 mp->next = minipool_vector_head;
11629 if (mp->next == NULL)
11631 minipool_vector_tail = mp;
11632 minipool_vector_label = gen_label_rtx ();
11635 mp->next->prev = mp;
11637 minipool_vector_head = mp;
11641 mp->next = min_mp->next;
11645 if (mp->next != NULL)
11646 mp->next->prev = mp;
11648 minipool_vector_tail = mp;
11651 /* Save the new entry. */
11659 /* Scan over the following entries and adjust their offsets. */
11660 while (mp->next != NULL)
11662 if (mp->next->min_address < mp->min_address + mp->fix_size)
11663 mp->next->min_address = mp->min_address + mp->fix_size;
11666 mp->next->offset = mp->offset + mp->fix_size;
11668 mp->next->offset = mp->offset;
11677 assign_minipool_offsets (Mfix *barrier)
11679 HOST_WIDE_INT offset = 0;
11682 minipool_barrier = barrier;
11684 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11686 mp->offset = offset;
11688 if (mp->refcount > 0)
11689 offset += mp->fix_size;
11693 /* Output the literal table */
11695 dump_minipool (rtx scan)
11701 if (ARM_DOUBLEWORD_ALIGN)
11702 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11703 if (mp->refcount > 0 && mp->fix_size >= 8)
11710 fprintf (dump_file,
11711 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11712 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11714 scan = emit_label_after (gen_label_rtx (), scan);
11715 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11716 scan = emit_label_after (minipool_vector_label, scan);
11718 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11720 if (mp->refcount > 0)
11724 fprintf (dump_file,
11725 ";; Offset %u, min %ld, max %ld ",
11726 (unsigned) mp->offset, (unsigned long) mp->min_address,
11727 (unsigned long) mp->max_address);
11728 arm_print_value (dump_file, mp->value);
11729 fputc ('\n', dump_file);
11732 switch (mp->fix_size)
11734 #ifdef HAVE_consttable_1
11736 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11740 #ifdef HAVE_consttable_2
11742 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11746 #ifdef HAVE_consttable_4
11748 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11752 #ifdef HAVE_consttable_8
11754 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11758 #ifdef HAVE_consttable_16
11760 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11765 gcc_unreachable ();
11773 minipool_vector_head = minipool_vector_tail = NULL;
11774 scan = emit_insn_after (gen_consttable_end (), scan);
11775 scan = emit_barrier_after (scan);
11778 /* Return the cost of forcibly inserting a barrier after INSN. */
11780 arm_barrier_cost (rtx insn)
11782 /* Basing the location of the pool on the loop depth is preferable,
11783 but at the moment, the basic block information seems to be
11784 corrupt by this stage of the compilation. */
11785 int base_cost = 50;
11786 rtx next = next_nonnote_insn (insn);
11788 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11791 switch (GET_CODE (insn))
11794 /* It will always be better to place the table before the label, rather
11803 return base_cost - 10;
11806 return base_cost + 10;
11810 /* Find the best place in the insn stream in the range
11811 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11812 Create the barrier by inserting a jump and add a new fix entry for
11815 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11817 HOST_WIDE_INT count = 0;
11819 rtx from = fix->insn;
11820 /* The instruction after which we will insert the jump. */
11821 rtx selected = NULL;
11823 /* The address at which the jump instruction will be placed. */
11824 HOST_WIDE_INT selected_address;
11826 HOST_WIDE_INT max_count = max_address - fix->address;
11827 rtx label = gen_label_rtx ();
11829 selected_cost = arm_barrier_cost (from);
11830 selected_address = fix->address;
11832 while (from && count < max_count)
11837 /* This code shouldn't have been called if there was a natural barrier
11839 gcc_assert (GET_CODE (from) != BARRIER);
11841 /* Count the length of this insn. */
11842 count += get_attr_length (from);
11844 /* If there is a jump table, add its length. */
11845 tmp = is_jump_table (from);
11848 count += get_jump_table_size (tmp);
11850 /* Jump tables aren't in a basic block, so base the cost on
11851 the dispatch insn. If we select this location, we will
11852 still put the pool after the table. */
11853 new_cost = arm_barrier_cost (from);
11855 if (count < max_count
11856 && (!selected || new_cost <= selected_cost))
11859 selected_cost = new_cost;
11860 selected_address = fix->address + count;
11863 /* Continue after the dispatch table. */
11864 from = NEXT_INSN (tmp);
11868 new_cost = arm_barrier_cost (from);
11870 if (count < max_count
11871 && (!selected || new_cost <= selected_cost))
11874 selected_cost = new_cost;
11875 selected_address = fix->address + count;
11878 from = NEXT_INSN (from);
11881 /* Make sure that we found a place to insert the jump. */
11882 gcc_assert (selected);
11884 /* Make sure we do not split a call and its corresponding
11885 CALL_ARG_LOCATION note. */
11886 if (CALL_P (selected))
11888 rtx next = NEXT_INSN (selected);
11889 if (next && NOTE_P (next)
11890 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11894 /* Create a new JUMP_INSN that branches around a barrier. */
11895 from = emit_jump_insn_after (gen_jump (label), selected);
11896 JUMP_LABEL (from) = label;
11897 barrier = emit_barrier_after (from);
11898 emit_label_after (label, barrier);
11900 /* Create a minipool barrier entry for the new barrier. */
11901 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11902 new_fix->insn = barrier;
11903 new_fix->address = selected_address;
11904 new_fix->next = fix->next;
11905 fix->next = new_fix;
11910 /* Record that there is a natural barrier in the insn stream at
11913 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11915 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11918 fix->address = address;
11921 if (minipool_fix_head != NULL)
11922 minipool_fix_tail->next = fix;
11924 minipool_fix_head = fix;
11926 minipool_fix_tail = fix;
11929 /* Record INSN, which will need fixing up to load a value from the
11930 minipool. ADDRESS is the offset of the insn since the start of the
11931 function; LOC is a pointer to the part of the insn which requires
11932 fixing; VALUE is the constant that must be loaded, which is of type
11935 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11936 enum machine_mode mode, rtx value)
11938 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11941 fix->address = address;
11944 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11945 fix->value = value;
11946 fix->forwards = get_attr_pool_range (insn);
11947 fix->backwards = get_attr_neg_pool_range (insn);
11948 fix->minipool = NULL;
11950 /* If an insn doesn't have a range defined for it, then it isn't
11951 expecting to be reworked by this code. Better to stop now than
11952 to generate duff assembly code. */
11953 gcc_assert (fix->forwards || fix->backwards);
11955 /* If an entry requires 8-byte alignment then assume all constant pools
11956 require 4 bytes of padding. Trying to do this later on a per-pool
11957 basis is awkward because existing pool entries have to be modified. */
11958 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11963 fprintf (dump_file,
11964 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11965 GET_MODE_NAME (mode),
11966 INSN_UID (insn), (unsigned long) address,
11967 -1 * (long)fix->backwards, (long)fix->forwards);
11968 arm_print_value (dump_file, fix->value);
11969 fprintf (dump_file, "\n");
11972 /* Add it to the chain of fixes. */
11975 if (minipool_fix_head != NULL)
11976 minipool_fix_tail->next = fix;
11978 minipool_fix_head = fix;
11980 minipool_fix_tail = fix;
11983 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11984 Returns the number of insns needed, or 99 if we don't know how to
11987 arm_const_double_inline_cost (rtx val)
11989 rtx lowpart, highpart;
11990 enum machine_mode mode;
11992 mode = GET_MODE (val);
11994 if (mode == VOIDmode)
11997 gcc_assert (GET_MODE_SIZE (mode) == 8);
11999 lowpart = gen_lowpart (SImode, val);
12000 highpart = gen_highpart_mode (SImode, mode, val);
12002 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12003 gcc_assert (GET_CODE (highpart) == CONST_INT);
12005 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12006 NULL_RTX, NULL_RTX, 0, 0)
12007 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12008 NULL_RTX, NULL_RTX, 0, 0));
12011 /* Return true if it is worthwhile to split a 64-bit constant into two
12012 32-bit operations. This is the case if optimizing for size, or
12013 if we have load delay slots, or if one 32-bit part can be done with
12014 a single data operation. */
12016 arm_const_double_by_parts (rtx val)
12018 enum machine_mode mode = GET_MODE (val);
12021 if (optimize_size || arm_ld_sched)
12024 if (mode == VOIDmode)
12027 part = gen_highpart_mode (SImode, mode, val);
12029 gcc_assert (GET_CODE (part) == CONST_INT);
12031 if (const_ok_for_arm (INTVAL (part))
12032 || const_ok_for_arm (~INTVAL (part)))
12035 part = gen_lowpart (SImode, val);
12037 gcc_assert (GET_CODE (part) == CONST_INT);
12039 if (const_ok_for_arm (INTVAL (part))
12040 || const_ok_for_arm (~INTVAL (part)))
12046 /* Return true if it is possible to inline both the high and low parts
12047 of a 64-bit constant into 32-bit data processing instructions. */
12049 arm_const_double_by_immediates (rtx val)
12051 enum machine_mode mode = GET_MODE (val);
12054 if (mode == VOIDmode)
12057 part = gen_highpart_mode (SImode, mode, val);
12059 gcc_assert (GET_CODE (part) == CONST_INT);
12061 if (!const_ok_for_arm (INTVAL (part)))
12064 part = gen_lowpart (SImode, val);
12066 gcc_assert (GET_CODE (part) == CONST_INT);
12068 if (!const_ok_for_arm (INTVAL (part)))
12074 /* Scan INSN and note any of its operands that need fixing.
12075 If DO_PUSHES is false we do not actually push any of the fixups
12076 needed. The function returns TRUE if any fixups were needed/pushed.
12077 This is used by arm_memory_load_p() which needs to know about loads
12078 of constants that will be converted into minipool loads. */
12080 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12082 bool result = false;
12085 extract_insn (insn);
12087 if (!constrain_operands (1))
12088 fatal_insn_not_found (insn);
12090 if (recog_data.n_alternatives == 0)
12093 /* Fill in recog_op_alt with information about the constraints of
12095 preprocess_constraints ();
12097 for (opno = 0; opno < recog_data.n_operands; opno++)
12099 /* Things we need to fix can only occur in inputs. */
12100 if (recog_data.operand_type[opno] != OP_IN)
12103 /* If this alternative is a memory reference, then any mention
12104 of constants in this alternative is really to fool reload
12105 into allowing us to accept one there. We need to fix them up
12106 now so that we output the right code. */
12107 if (recog_op_alt[opno][which_alternative].memory_ok)
12109 rtx op = recog_data.operand[opno];
12111 if (CONSTANT_P (op))
12114 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12115 recog_data.operand_mode[opno], op);
12118 else if (GET_CODE (op) == MEM
12119 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12120 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12124 rtx cop = avoid_constant_pool_reference (op);
12126 /* Casting the address of something to a mode narrower
12127 than a word can cause avoid_constant_pool_reference()
12128 to return the pool reference itself. That's no good to
12129 us here. Lets just hope that we can use the
12130 constant pool value directly. */
12132 cop = get_pool_constant (XEXP (op, 0));
12134 push_minipool_fix (insn, address,
12135 recog_data.operand_loc[opno],
12136 recog_data.operand_mode[opno], cop);
12147 /* Convert instructions to their cc-clobbering variant if possible, since
12148 that allows us to use smaller encodings. */
12151 thumb2_reorg (void)
12156 INIT_REG_SET (&live);
12158 /* We are freeing block_for_insn in the toplev to keep compatibility
12159 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12160 compute_bb_for_insn ();
12167 COPY_REG_SET (&live, DF_LR_OUT (bb));
12168 df_simulate_initialize_backwards (bb, &live);
12169 FOR_BB_INSNS_REVERSE (bb, insn)
12171 if (NONJUMP_INSN_P (insn)
12172 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12174 rtx pat = PATTERN (insn);
12175 if (GET_CODE (pat) == SET
12176 && low_register_operand (XEXP (pat, 0), SImode)
12177 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12178 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12179 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12181 rtx dst = XEXP (pat, 0);
12182 rtx src = XEXP (pat, 1);
12183 rtx op0 = XEXP (src, 0);
12184 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12185 ? XEXP (src, 1) : NULL);
12187 if (rtx_equal_p (dst, op0)
12188 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12190 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12191 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12192 rtvec vec = gen_rtvec (2, pat, clobber);
12194 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12195 INSN_CODE (insn) = -1;
12197 /* We can also handle a commutative operation where the
12198 second operand matches the destination. */
12199 else if (op1 && rtx_equal_p (dst, op1))
12201 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12202 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12205 src = copy_rtx (src);
12206 XEXP (src, 0) = op1;
12207 XEXP (src, 1) = op0;
12208 pat = gen_rtx_SET (VOIDmode, dst, src);
12209 vec = gen_rtvec (2, pat, clobber);
12210 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12211 INSN_CODE (insn) = -1;
12216 if (NONDEBUG_INSN_P (insn))
12217 df_simulate_one_insn_backwards (bb, insn, &live);
12221 CLEAR_REG_SET (&live);
12224 /* Gcc puts the pool in the wrong place for ARM, since we can only
12225 load addresses a limited distance around the pc. We do some
12226 special munging to move the constant pool values to the correct
12227 point in the code. */
12232 HOST_WIDE_INT address = 0;
12238 minipool_fix_head = minipool_fix_tail = NULL;
12240 /* The first insn must always be a note, or the code below won't
12241 scan it properly. */
12242 insn = get_insns ();
12243 gcc_assert (GET_CODE (insn) == NOTE);
12246 /* Scan all the insns and record the operands that will need fixing. */
12247 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12249 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12250 && (arm_cirrus_insn_p (insn)
12251 || GET_CODE (insn) == JUMP_INSN
12252 || arm_memory_load_p (insn)))
12253 cirrus_reorg (insn);
12255 if (GET_CODE (insn) == BARRIER)
12256 push_minipool_barrier (insn, address);
12257 else if (INSN_P (insn))
12261 note_invalid_constants (insn, address, true);
12262 address += get_attr_length (insn);
12264 /* If the insn is a vector jump, add the size of the table
12265 and skip the table. */
12266 if ((table = is_jump_table (insn)) != NULL)
12268 address += get_jump_table_size (table);
12274 fix = minipool_fix_head;
12276 /* Now scan the fixups and perform the required changes. */
12281 Mfix * last_added_fix;
12282 Mfix * last_barrier = NULL;
12285 /* Skip any further barriers before the next fix. */
12286 while (fix && GET_CODE (fix->insn) == BARRIER)
12289 /* No more fixes. */
12293 last_added_fix = NULL;
12295 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12297 if (GET_CODE (ftmp->insn) == BARRIER)
12299 if (ftmp->address >= minipool_vector_head->max_address)
12302 last_barrier = ftmp;
12304 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12307 last_added_fix = ftmp; /* Keep track of the last fix added. */
12310 /* If we found a barrier, drop back to that; any fixes that we
12311 could have reached but come after the barrier will now go in
12312 the next mini-pool. */
12313 if (last_barrier != NULL)
12315 /* Reduce the refcount for those fixes that won't go into this
12317 for (fdel = last_barrier->next;
12318 fdel && fdel != ftmp;
12321 fdel->minipool->refcount--;
12322 fdel->minipool = NULL;
12325 ftmp = last_barrier;
12329 /* ftmp is first fix that we can't fit into this pool and
12330 there no natural barriers that we could use. Insert a
12331 new barrier in the code somewhere between the previous
12332 fix and this one, and arrange to jump around it. */
12333 HOST_WIDE_INT max_address;
12335 /* The last item on the list of fixes must be a barrier, so
12336 we can never run off the end of the list of fixes without
12337 last_barrier being set. */
12340 max_address = minipool_vector_head->max_address;
12341 /* Check that there isn't another fix that is in range that
12342 we couldn't fit into this pool because the pool was
12343 already too large: we need to put the pool before such an
12344 instruction. The pool itself may come just after the
12345 fix because create_fix_barrier also allows space for a
12346 jump instruction. */
12347 if (ftmp->address < max_address)
12348 max_address = ftmp->address + 1;
12350 last_barrier = create_fix_barrier (last_added_fix, max_address);
12353 assign_minipool_offsets (last_barrier);
12357 if (GET_CODE (ftmp->insn) != BARRIER
12358 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12365 /* Scan over the fixes we have identified for this pool, fixing them
12366 up and adding the constants to the pool itself. */
12367 for (this_fix = fix; this_fix && ftmp != this_fix;
12368 this_fix = this_fix->next)
12369 if (GET_CODE (this_fix->insn) != BARRIER)
12372 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12373 minipool_vector_label),
12374 this_fix->minipool->offset);
12375 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12378 dump_minipool (last_barrier->insn);
12382 /* From now on we must synthesize any constants that we can't handle
12383 directly. This can happen if the RTL gets split during final
12384 instruction generation. */
12385 after_arm_reorg = 1;
12387 /* Free the minipool memory. */
12388 obstack_free (&minipool_obstack, minipool_startobj);
12391 /* Routines to output assembly language. */
12393 /* If the rtx is the correct value then return the string of the number.
12394 In this way we can ensure that valid double constants are generated even
12395 when cross compiling. */
12397 fp_immediate_constant (rtx x)
12402 if (!fp_consts_inited)
12405 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12406 for (i = 0; i < 8; i++)
12407 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12408 return strings_fp[i];
12410 gcc_unreachable ();
12413 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12414 static const char *
12415 fp_const_from_val (REAL_VALUE_TYPE *r)
12419 if (!fp_consts_inited)
12422 for (i = 0; i < 8; i++)
12423 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12424 return strings_fp[i];
12426 gcc_unreachable ();
12429 /* Output the operands of a LDM/STM instruction to STREAM.
12430 MASK is the ARM register set mask of which only bits 0-15 are important.
12431 REG is the base register, either the frame pointer or the stack pointer,
12432 INSTR is the possibly suffixed load or store instruction.
12433 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12436 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12437 unsigned long mask, int rfe)
12440 bool not_first = FALSE;
12442 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12443 fputc ('\t', stream);
12444 asm_fprintf (stream, instr, reg);
12445 fputc ('{', stream);
12447 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12448 if (mask & (1 << i))
12451 fprintf (stream, ", ");
12453 asm_fprintf (stream, "%r", i);
12458 fprintf (stream, "}^\n");
12460 fprintf (stream, "}\n");
12464 /* Output a FLDMD instruction to STREAM.
12465 BASE if the register containing the address.
12466 REG and COUNT specify the register range.
12467 Extra registers may be added to avoid hardware bugs.
12469 We output FLDMD even for ARMv5 VFP implementations. Although
12470 FLDMD is technically not supported until ARMv6, it is believed
12471 that all VFP implementations support its use in this context. */
12474 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12478 /* Workaround ARM10 VFPr1 bug. */
12479 if (count == 2 && !arm_arch6)
12486 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12487 load into multiple parts if we have to handle more than 16 registers. */
12490 vfp_output_fldmd (stream, base, reg, 16);
12491 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12495 fputc ('\t', stream);
12496 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12498 for (i = reg; i < reg + count; i++)
12501 fputs (", ", stream);
12502 asm_fprintf (stream, "d%d", i);
12504 fputs ("}\n", stream);
12509 /* Output the assembly for a store multiple. */
12512 vfp_output_fstmd (rtx * operands)
12519 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12520 p = strlen (pattern);
12522 gcc_assert (GET_CODE (operands[1]) == REG);
12524 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12525 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12527 p += sprintf (&pattern[p], ", d%d", base + i);
12529 strcpy (&pattern[p], "}");
12531 output_asm_insn (pattern, operands);
12536 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12537 number of bytes pushed. */
12540 vfp_emit_fstmd (int base_reg, int count)
12547 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12548 register pairs are stored by a store multiple insn. We avoid this
12549 by pushing an extra pair. */
12550 if (count == 2 && !arm_arch6)
12552 if (base_reg == LAST_VFP_REGNUM - 3)
12557 /* FSTMD may not store more than 16 doubleword registers at once. Split
12558 larger stores into multiple parts (up to a maximum of two, in
12563 /* NOTE: base_reg is an internal register number, so each D register
12565 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12566 saved += vfp_emit_fstmd (base_reg, 16);
12570 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12571 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12573 reg = gen_rtx_REG (DFmode, base_reg);
12576 XVECEXP (par, 0, 0)
12577 = gen_rtx_SET (VOIDmode,
12580 gen_rtx_PRE_MODIFY (Pmode,
12583 (stack_pointer_rtx,
12586 gen_rtx_UNSPEC (BLKmode,
12587 gen_rtvec (1, reg),
12588 UNSPEC_PUSH_MULT));
12590 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12591 plus_constant (stack_pointer_rtx, -(count * 8)));
12592 RTX_FRAME_RELATED_P (tmp) = 1;
12593 XVECEXP (dwarf, 0, 0) = tmp;
12595 tmp = gen_rtx_SET (VOIDmode,
12596 gen_frame_mem (DFmode, stack_pointer_rtx),
12598 RTX_FRAME_RELATED_P (tmp) = 1;
12599 XVECEXP (dwarf, 0, 1) = tmp;
12601 for (i = 1; i < count; i++)
12603 reg = gen_rtx_REG (DFmode, base_reg);
12605 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12607 tmp = gen_rtx_SET (VOIDmode,
12608 gen_frame_mem (DFmode,
12609 plus_constant (stack_pointer_rtx,
12612 RTX_FRAME_RELATED_P (tmp) = 1;
12613 XVECEXP (dwarf, 0, i + 1) = tmp;
12616 par = emit_insn (par);
12617 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12618 RTX_FRAME_RELATED_P (par) = 1;
12623 /* Emit a call instruction with pattern PAT. ADDR is the address of
12624 the call target. */
12627 arm_emit_call_insn (rtx pat, rtx addr)
12631 insn = emit_call_insn (pat);
12633 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12634 If the call might use such an entry, add a use of the PIC register
12635 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12636 if (TARGET_VXWORKS_RTP
12638 && GET_CODE (addr) == SYMBOL_REF
12639 && (SYMBOL_REF_DECL (addr)
12640 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12641 : !SYMBOL_REF_LOCAL_P (addr)))
12643 require_pic_register ();
12644 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12648 /* Output a 'call' insn. */
12650 output_call (rtx *operands)
12652 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12654 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12655 if (REGNO (operands[0]) == LR_REGNUM)
12657 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12658 output_asm_insn ("mov%?\t%0, %|lr", operands);
12661 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12663 if (TARGET_INTERWORK || arm_arch4t)
12664 output_asm_insn ("bx%?\t%0", operands);
12666 output_asm_insn ("mov%?\t%|pc, %0", operands);
12671 /* Output a 'call' insn that is a reference in memory. This is
12672 disabled for ARMv5 and we prefer a blx instead because otherwise
12673 there's a significant performance overhead. */
12675 output_call_mem (rtx *operands)
12677 gcc_assert (!arm_arch5);
12678 if (TARGET_INTERWORK)
12680 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12681 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12682 output_asm_insn ("bx%?\t%|ip", operands);
12684 else if (regno_use_in (LR_REGNUM, operands[0]))
12686 /* LR is used in the memory address. We load the address in the
12687 first instruction. It's safe to use IP as the target of the
12688 load since the call will kill it anyway. */
12689 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12690 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12692 output_asm_insn ("bx%?\t%|ip", operands);
12694 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12698 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12699 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12706 /* Output a move from arm registers to an fpa registers.
12707 OPERANDS[0] is an fpa register.
12708 OPERANDS[1] is the first registers of an arm register pair. */
12710 output_mov_long_double_fpa_from_arm (rtx *operands)
12712 int arm_reg0 = REGNO (operands[1]);
12715 gcc_assert (arm_reg0 != IP_REGNUM);
12717 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12718 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12719 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12721 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12722 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12727 /* Output a move from an fpa register to arm registers.
12728 OPERANDS[0] is the first registers of an arm register pair.
12729 OPERANDS[1] is an fpa register. */
12731 output_mov_long_double_arm_from_fpa (rtx *operands)
12733 int arm_reg0 = REGNO (operands[0]);
12736 gcc_assert (arm_reg0 != IP_REGNUM);
12738 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12739 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12740 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12742 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12743 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12747 /* Output a move from arm registers to arm registers of a long double
12748 OPERANDS[0] is the destination.
12749 OPERANDS[1] is the source. */
12751 output_mov_long_double_arm_from_arm (rtx *operands)
12753 /* We have to be careful here because the two might overlap. */
12754 int dest_start = REGNO (operands[0]);
12755 int src_start = REGNO (operands[1]);
12759 if (dest_start < src_start)
12761 for (i = 0; i < 3; i++)
12763 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12764 ops[1] = gen_rtx_REG (SImode, src_start + i);
12765 output_asm_insn ("mov%?\t%0, %1", ops);
12770 for (i = 2; i >= 0; i--)
12772 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12773 ops[1] = gen_rtx_REG (SImode, src_start + i);
12774 output_asm_insn ("mov%?\t%0, %1", ops);
12782 arm_emit_movpair (rtx dest, rtx src)
12784 /* If the src is an immediate, simplify it. */
12785 if (CONST_INT_P (src))
12787 HOST_WIDE_INT val = INTVAL (src);
12788 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12789 if ((val >> 16) & 0x0000ffff)
12790 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12792 GEN_INT ((val >> 16) & 0x0000ffff));
12795 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12796 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12799 /* Output a move from arm registers to an fpa registers.
12800 OPERANDS[0] is an fpa register.
12801 OPERANDS[1] is the first registers of an arm register pair. */
12803 output_mov_double_fpa_from_arm (rtx *operands)
12805 int arm_reg0 = REGNO (operands[1]);
12808 gcc_assert (arm_reg0 != IP_REGNUM);
12810 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12811 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12812 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12813 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12817 /* Output a move from an fpa register to arm registers.
12818 OPERANDS[0] is the first registers of an arm register pair.
12819 OPERANDS[1] is an fpa register. */
12821 output_mov_double_arm_from_fpa (rtx *operands)
12823 int arm_reg0 = REGNO (operands[0]);
12826 gcc_assert (arm_reg0 != IP_REGNUM);
12828 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12829 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12830 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12831 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12835 /* Output a move between double words. It must be REG<-MEM
12838 output_move_double (rtx *operands)
12840 enum rtx_code code0 = GET_CODE (operands[0]);
12841 enum rtx_code code1 = GET_CODE (operands[1]);
12846 unsigned int reg0 = REGNO (operands[0]);
12848 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12850 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12852 switch (GET_CODE (XEXP (operands[1], 0)))
12856 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12857 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12859 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12863 gcc_assert (TARGET_LDRD);
12864 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12869 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12871 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12876 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12878 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12882 gcc_assert (TARGET_LDRD);
12883 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12888 /* Autoicrement addressing modes should never have overlapping
12889 base and destination registers, and overlapping index registers
12890 are already prohibited, so this doesn't need to worry about
12892 otherops[0] = operands[0];
12893 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12894 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12896 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12898 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12900 /* Registers overlap so split out the increment. */
12901 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12902 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12906 /* Use a single insn if we can.
12907 FIXME: IWMMXT allows offsets larger than ldrd can
12908 handle, fix these up with a pair of ldr. */
12910 || GET_CODE (otherops[2]) != CONST_INT
12911 || (INTVAL (otherops[2]) > -256
12912 && INTVAL (otherops[2]) < 256))
12913 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12916 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12917 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12923 /* Use a single insn if we can.
12924 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12925 fix these up with a pair of ldr. */
12927 || GET_CODE (otherops[2]) != CONST_INT
12928 || (INTVAL (otherops[2]) > -256
12929 && INTVAL (otherops[2]) < 256))
12930 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12933 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12934 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12941 /* We might be able to use ldrd %0, %1 here. However the range is
12942 different to ldr/adr, and it is broken on some ARMv7-M
12943 implementations. */
12944 /* Use the second register of the pair to avoid problematic
12946 otherops[1] = operands[1];
12947 output_asm_insn ("adr%?\t%0, %1", otherops);
12948 operands[1] = otherops[0];
12950 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12952 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12955 /* ??? This needs checking for thumb2. */
12957 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12958 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12960 otherops[0] = operands[0];
12961 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12962 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12964 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12966 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12968 switch ((int) INTVAL (otherops[2]))
12971 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12976 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12981 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12985 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12986 operands[1] = otherops[0];
12988 && (GET_CODE (otherops[2]) == REG
12990 || (GET_CODE (otherops[2]) == CONST_INT
12991 && INTVAL (otherops[2]) > -256
12992 && INTVAL (otherops[2]) < 256)))
12994 if (reg_overlap_mentioned_p (operands[0],
12998 /* Swap base and index registers over to
12999 avoid a conflict. */
13001 otherops[1] = otherops[2];
13004 /* If both registers conflict, it will usually
13005 have been fixed by a splitter. */
13006 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13007 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13009 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13010 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13014 otherops[0] = operands[0];
13015 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13020 if (GET_CODE (otherops[2]) == CONST_INT)
13022 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13023 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13025 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13028 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13031 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13034 return "ldr%(d%)\t%0, [%1]";
13036 return "ldm%(ia%)\t%1, %M0";
13040 otherops[1] = adjust_address (operands[1], SImode, 4);
13041 /* Take care of overlapping base/data reg. */
13042 if (reg_mentioned_p (operands[0], operands[1]))
13044 output_asm_insn ("ldr%?\t%0, %1", otherops);
13045 output_asm_insn ("ldr%?\t%0, %1", operands);
13049 output_asm_insn ("ldr%?\t%0, %1", operands);
13050 output_asm_insn ("ldr%?\t%0, %1", otherops);
13057 /* Constraints should ensure this. */
13058 gcc_assert (code0 == MEM && code1 == REG);
13059 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13061 switch (GET_CODE (XEXP (operands[0], 0)))
13065 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13067 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13071 gcc_assert (TARGET_LDRD);
13072 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13077 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13079 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13084 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13086 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13090 gcc_assert (TARGET_LDRD);
13091 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13096 otherops[0] = operands[1];
13097 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13098 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13100 /* IWMMXT allows offsets larger than ldrd can handle,
13101 fix these up with a pair of ldr. */
13103 && GET_CODE (otherops[2]) == CONST_INT
13104 && (INTVAL(otherops[2]) <= -256
13105 || INTVAL(otherops[2]) >= 256))
13107 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13109 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13110 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13114 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13115 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13118 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13119 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13121 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13125 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13126 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13128 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13131 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13137 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13143 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13148 && (GET_CODE (otherops[2]) == REG
13150 || (GET_CODE (otherops[2]) == CONST_INT
13151 && INTVAL (otherops[2]) > -256
13152 && INTVAL (otherops[2]) < 256)))
13154 otherops[0] = operands[1];
13155 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13156 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13162 otherops[0] = adjust_address (operands[0], SImode, 4);
13163 otherops[1] = operands[1];
13164 output_asm_insn ("str%?\t%1, %0", operands);
13165 output_asm_insn ("str%?\t%H1, %0", otherops);
13172 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13173 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13176 output_move_quad (rtx *operands)
13178 if (REG_P (operands[0]))
13180 /* Load, or reg->reg move. */
13182 if (MEM_P (operands[1]))
13184 switch (GET_CODE (XEXP (operands[1], 0)))
13187 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13192 output_asm_insn ("adr%?\t%0, %1", operands);
13193 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13197 gcc_unreachable ();
13205 gcc_assert (REG_P (operands[1]));
13207 dest = REGNO (operands[0]);
13208 src = REGNO (operands[1]);
13210 /* This seems pretty dumb, but hopefully GCC won't try to do it
13213 for (i = 0; i < 4; i++)
13215 ops[0] = gen_rtx_REG (SImode, dest + i);
13216 ops[1] = gen_rtx_REG (SImode, src + i);
13217 output_asm_insn ("mov%?\t%0, %1", ops);
13220 for (i = 3; i >= 0; i--)
13222 ops[0] = gen_rtx_REG (SImode, dest + i);
13223 ops[1] = gen_rtx_REG (SImode, src + i);
13224 output_asm_insn ("mov%?\t%0, %1", ops);
13230 gcc_assert (MEM_P (operands[0]));
13231 gcc_assert (REG_P (operands[1]));
13232 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13234 switch (GET_CODE (XEXP (operands[0], 0)))
13237 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13241 gcc_unreachable ();
13248 /* Output a VFP load or store instruction. */
13251 output_move_vfp (rtx *operands)
13253 rtx reg, mem, addr, ops[2];
13254 int load = REG_P (operands[0]);
13255 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13256 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13259 enum machine_mode mode;
13261 reg = operands[!load];
13262 mem = operands[load];
13264 mode = GET_MODE (reg);
13266 gcc_assert (REG_P (reg));
13267 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13268 gcc_assert (mode == SFmode
13272 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13273 gcc_assert (MEM_P (mem));
13275 addr = XEXP (mem, 0);
13277 switch (GET_CODE (addr))
13280 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13281 ops[0] = XEXP (addr, 0);
13286 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13287 ops[0] = XEXP (addr, 0);
13292 templ = "f%s%c%%?\t%%%s0, %%1%s";
13298 sprintf (buff, templ,
13299 load ? "ld" : "st",
13302 integer_p ? "\t%@ int" : "");
13303 output_asm_insn (buff, ops);
13308 /* Output a Neon quad-word load or store, or a load or store for
13309 larger structure modes.
13311 WARNING: The ordering of elements is weird in big-endian mode,
13312 because we use VSTM, as required by the EABI. GCC RTL defines
13313 element ordering based on in-memory order. This can be differ
13314 from the architectural ordering of elements within a NEON register.
13315 The intrinsics defined in arm_neon.h use the NEON register element
13316 ordering, not the GCC RTL element ordering.
13318 For example, the in-memory ordering of a big-endian a quadword
13319 vector with 16-bit elements when stored from register pair {d0,d1}
13320 will be (lowest address first, d0[N] is NEON register element N):
13322 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13324 When necessary, quadword registers (dN, dN+1) are moved to ARM
13325 registers from rN in the order:
13327 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13329 So that STM/LDM can be used on vectors in ARM registers, and the
13330 same memory layout will result as if VSTM/VLDM were used. */
13333 output_move_neon (rtx *operands)
13335 rtx reg, mem, addr, ops[2];
13336 int regno, load = REG_P (operands[0]);
13339 enum machine_mode mode;
13341 reg = operands[!load];
13342 mem = operands[load];
13344 mode = GET_MODE (reg);
13346 gcc_assert (REG_P (reg));
13347 regno = REGNO (reg);
13348 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13349 || NEON_REGNO_OK_FOR_QUAD (regno));
13350 gcc_assert (VALID_NEON_DREG_MODE (mode)
13351 || VALID_NEON_QREG_MODE (mode)
13352 || VALID_NEON_STRUCT_MODE (mode));
13353 gcc_assert (MEM_P (mem));
13355 addr = XEXP (mem, 0);
13357 /* Strip off const from addresses like (const (plus (...))). */
13358 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13359 addr = XEXP (addr, 0);
13361 switch (GET_CODE (addr))
13364 templ = "v%smia%%?\t%%0!, %%h1";
13365 ops[0] = XEXP (addr, 0);
13370 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13371 templ = "v%smdb%%?\t%%0!, %%h1";
13372 ops[0] = XEXP (addr, 0);
13377 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13378 gcc_unreachable ();
13383 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13386 for (i = 0; i < nregs; i++)
13388 /* We're only using DImode here because it's a convenient size. */
13389 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13390 ops[1] = adjust_address (mem, DImode, 8 * i);
13391 if (reg_overlap_mentioned_p (ops[0], mem))
13393 gcc_assert (overlap == -1);
13398 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13399 output_asm_insn (buff, ops);
13404 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13405 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13406 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13407 output_asm_insn (buff, ops);
13414 templ = "v%smia%%?\t%%m0, %%h1";
13419 sprintf (buff, templ, load ? "ld" : "st");
13420 output_asm_insn (buff, ops);
13425 /* Compute and return the length of neon_mov<mode>, where <mode> is
13426 one of VSTRUCT modes: EI, OI, CI or XI. */
13428 arm_attr_length_move_neon (rtx insn)
13430 rtx reg, mem, addr;
13432 enum machine_mode mode;
13434 extract_insn_cached (insn);
13436 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13438 mode = GET_MODE (recog_data.operand[0]);
13449 gcc_unreachable ();
13453 load = REG_P (recog_data.operand[0]);
13454 reg = recog_data.operand[!load];
13455 mem = recog_data.operand[load];
13457 gcc_assert (MEM_P (mem));
13459 mode = GET_MODE (reg);
13460 addr = XEXP (mem, 0);
13462 /* Strip off const from addresses like (const (plus (...))). */
13463 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13464 addr = XEXP (addr, 0);
13466 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13468 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13475 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13479 arm_address_offset_is_imm (rtx insn)
13483 extract_insn_cached (insn);
13485 if (REG_P (recog_data.operand[0]))
13488 mem = recog_data.operand[0];
13490 gcc_assert (MEM_P (mem));
13492 addr = XEXP (mem, 0);
13494 if (GET_CODE (addr) == REG
13495 || (GET_CODE (addr) == PLUS
13496 && GET_CODE (XEXP (addr, 0)) == REG
13497 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13503 /* Output an ADD r, s, #n where n may be too big for one instruction.
13504 If adding zero to one register, output nothing. */
13506 output_add_immediate (rtx *operands)
13508 HOST_WIDE_INT n = INTVAL (operands[2]);
13510 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13513 output_multi_immediate (operands,
13514 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13517 output_multi_immediate (operands,
13518 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13525 /* Output a multiple immediate operation.
13526 OPERANDS is the vector of operands referred to in the output patterns.
13527 INSTR1 is the output pattern to use for the first constant.
13528 INSTR2 is the output pattern to use for subsequent constants.
13529 IMMED_OP is the index of the constant slot in OPERANDS.
13530 N is the constant value. */
13531 static const char *
13532 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13533 int immed_op, HOST_WIDE_INT n)
13535 #if HOST_BITS_PER_WIDE_INT > 32
13541 /* Quick and easy output. */
13542 operands[immed_op] = const0_rtx;
13543 output_asm_insn (instr1, operands);
13548 const char * instr = instr1;
13550 /* Note that n is never zero here (which would give no output). */
13551 for (i = 0; i < 32; i += 2)
13555 operands[immed_op] = GEN_INT (n & (255 << i));
13556 output_asm_insn (instr, operands);
13566 /* Return the name of a shifter operation. */
13567 static const char *
13568 arm_shift_nmem(enum rtx_code code)
13573 return ARM_LSL_NAME;
13589 /* Return the appropriate ARM instruction for the operation code.
13590 The returned result should not be overwritten. OP is the rtx of the
13591 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13594 arithmetic_instr (rtx op, int shift_first_arg)
13596 switch (GET_CODE (op))
13602 return shift_first_arg ? "rsb" : "sub";
13617 return arm_shift_nmem(GET_CODE(op));
13620 gcc_unreachable ();
13624 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13625 for the operation code. The returned result should not be overwritten.
13626 OP is the rtx code of the shift.
13627 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13629 static const char *
13630 shift_op (rtx op, HOST_WIDE_INT *amountp)
13633 enum rtx_code code = GET_CODE (op);
13635 switch (GET_CODE (XEXP (op, 1)))
13643 *amountp = INTVAL (XEXP (op, 1));
13647 gcc_unreachable ();
13653 gcc_assert (*amountp != -1);
13654 *amountp = 32 - *amountp;
13657 /* Fall through. */
13663 mnem = arm_shift_nmem(code);
13667 /* We never have to worry about the amount being other than a
13668 power of 2, since this case can never be reloaded from a reg. */
13669 gcc_assert (*amountp != -1);
13670 *amountp = int_log2 (*amountp);
13671 return ARM_LSL_NAME;
13674 gcc_unreachable ();
13677 if (*amountp != -1)
13679 /* This is not 100% correct, but follows from the desire to merge
13680 multiplication by a power of 2 with the recognizer for a
13681 shift. >=32 is not a valid shift for "lsl", so we must try and
13682 output a shift that produces the correct arithmetical result.
13683 Using lsr #32 is identical except for the fact that the carry bit
13684 is not set correctly if we set the flags; but we never use the
13685 carry bit from such an operation, so we can ignore that. */
13686 if (code == ROTATERT)
13687 /* Rotate is just modulo 32. */
13689 else if (*amountp != (*amountp & 31))
13691 if (code == ASHIFT)
13696 /* Shifts of 0 are no-ops. */
13704 /* Obtain the shift from the POWER of two. */
13706 static HOST_WIDE_INT
13707 int_log2 (HOST_WIDE_INT power)
13709 HOST_WIDE_INT shift = 0;
13711 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13713 gcc_assert (shift <= 31);
13720 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13721 because /bin/as is horribly restrictive. The judgement about
13722 whether or not each character is 'printable' (and can be output as
13723 is) or not (and must be printed with an octal escape) must be made
13724 with reference to the *host* character set -- the situation is
13725 similar to that discussed in the comments above pp_c_char in
13726 c-pretty-print.c. */
13728 #define MAX_ASCII_LEN 51
13731 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13734 int len_so_far = 0;
13736 fputs ("\t.ascii\t\"", stream);
13738 for (i = 0; i < len; i++)
13742 if (len_so_far >= MAX_ASCII_LEN)
13744 fputs ("\"\n\t.ascii\t\"", stream);
13750 if (c == '\\' || c == '\"')
13752 putc ('\\', stream);
13760 fprintf (stream, "\\%03o", c);
13765 fputs ("\"\n", stream);
13768 /* Compute the register save mask for registers 0 through 12
13769 inclusive. This code is used by arm_compute_save_reg_mask. */
13771 static unsigned long
13772 arm_compute_save_reg0_reg12_mask (void)
13774 unsigned long func_type = arm_current_func_type ();
13775 unsigned long save_reg_mask = 0;
13778 if (IS_INTERRUPT (func_type))
13780 unsigned int max_reg;
13781 /* Interrupt functions must not corrupt any registers,
13782 even call clobbered ones. If this is a leaf function
13783 we can just examine the registers used by the RTL, but
13784 otherwise we have to assume that whatever function is
13785 called might clobber anything, and so we have to save
13786 all the call-clobbered registers as well. */
13787 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13788 /* FIQ handlers have registers r8 - r12 banked, so
13789 we only need to check r0 - r7, Normal ISRs only
13790 bank r14 and r15, so we must check up to r12.
13791 r13 is the stack pointer which is always preserved,
13792 so we do not need to consider it here. */
13797 for (reg = 0; reg <= max_reg; reg++)
13798 if (df_regs_ever_live_p (reg)
13799 || (! current_function_is_leaf && call_used_regs[reg]))
13800 save_reg_mask |= (1 << reg);
13802 /* Also save the pic base register if necessary. */
13804 && !TARGET_SINGLE_PIC_BASE
13805 && arm_pic_register != INVALID_REGNUM
13806 && crtl->uses_pic_offset_table)
13807 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13809 else if (IS_VOLATILE(func_type))
13811 /* For noreturn functions we historically omitted register saves
13812 altogether. However this really messes up debugging. As a
13813 compromise save just the frame pointers. Combined with the link
13814 register saved elsewhere this should be sufficient to get
13816 if (frame_pointer_needed)
13817 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13818 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13819 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13820 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13821 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13825 /* In the normal case we only need to save those registers
13826 which are call saved and which are used by this function. */
13827 for (reg = 0; reg <= 11; reg++)
13828 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13829 save_reg_mask |= (1 << reg);
13831 /* Handle the frame pointer as a special case. */
13832 if (frame_pointer_needed)
13833 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13835 /* If we aren't loading the PIC register,
13836 don't stack it even though it may be live. */
13838 && !TARGET_SINGLE_PIC_BASE
13839 && arm_pic_register != INVALID_REGNUM
13840 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13841 || crtl->uses_pic_offset_table))
13842 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13844 /* The prologue will copy SP into R0, so save it. */
13845 if (IS_STACKALIGN (func_type))
13846 save_reg_mask |= 1;
13849 /* Save registers so the exception handler can modify them. */
13850 if (crtl->calls_eh_return)
13856 reg = EH_RETURN_DATA_REGNO (i);
13857 if (reg == INVALID_REGNUM)
13859 save_reg_mask |= 1 << reg;
13863 return save_reg_mask;
13867 /* Compute the number of bytes used to store the static chain register on the
13868 stack, above the stack frame. We need to know this accurately to get the
13869 alignment of the rest of the stack frame correct. */
13871 static int arm_compute_static_chain_stack_bytes (void)
13873 unsigned long func_type = arm_current_func_type ();
13874 int static_chain_stack_bytes = 0;
13876 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13877 IS_NESTED (func_type) &&
13878 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13879 static_chain_stack_bytes = 4;
13881 return static_chain_stack_bytes;
13885 /* Compute a bit mask of which registers need to be
13886 saved on the stack for the current function.
13887 This is used by arm_get_frame_offsets, which may add extra registers. */
13889 static unsigned long
13890 arm_compute_save_reg_mask (void)
13892 unsigned int save_reg_mask = 0;
13893 unsigned long func_type = arm_current_func_type ();
13896 if (IS_NAKED (func_type))
13897 /* This should never really happen. */
13900 /* If we are creating a stack frame, then we must save the frame pointer,
13901 IP (which will hold the old stack pointer), LR and the PC. */
13902 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13904 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13907 | (1 << PC_REGNUM);
13909 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13911 /* Decide if we need to save the link register.
13912 Interrupt routines have their own banked link register,
13913 so they never need to save it.
13914 Otherwise if we do not use the link register we do not need to save
13915 it. If we are pushing other registers onto the stack however, we
13916 can save an instruction in the epilogue by pushing the link register
13917 now and then popping it back into the PC. This incurs extra memory
13918 accesses though, so we only do it when optimizing for size, and only
13919 if we know that we will not need a fancy return sequence. */
13920 if (df_regs_ever_live_p (LR_REGNUM)
13923 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13924 && !crtl->calls_eh_return))
13925 save_reg_mask |= 1 << LR_REGNUM;
13927 if (cfun->machine->lr_save_eliminated)
13928 save_reg_mask &= ~ (1 << LR_REGNUM);
13930 if (TARGET_REALLY_IWMMXT
13931 && ((bit_count (save_reg_mask)
13932 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13933 arm_compute_static_chain_stack_bytes())
13936 /* The total number of registers that are going to be pushed
13937 onto the stack is odd. We need to ensure that the stack
13938 is 64-bit aligned before we start to save iWMMXt registers,
13939 and also before we start to create locals. (A local variable
13940 might be a double or long long which we will load/store using
13941 an iWMMXt instruction). Therefore we need to push another
13942 ARM register, so that the stack will be 64-bit aligned. We
13943 try to avoid using the arg registers (r0 -r3) as they might be
13944 used to pass values in a tail call. */
13945 for (reg = 4; reg <= 12; reg++)
13946 if ((save_reg_mask & (1 << reg)) == 0)
13950 save_reg_mask |= (1 << reg);
13953 cfun->machine->sibcall_blocked = 1;
13954 save_reg_mask |= (1 << 3);
13958 /* We may need to push an additional register for use initializing the
13959 PIC base register. */
13960 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13961 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13963 reg = thumb_find_work_register (1 << 4);
13964 if (!call_used_regs[reg])
13965 save_reg_mask |= (1 << reg);
13968 return save_reg_mask;
13972 /* Compute a bit mask of which registers need to be
13973 saved on the stack for the current function. */
13974 static unsigned long
13975 thumb1_compute_save_reg_mask (void)
13977 unsigned long mask;
13981 for (reg = 0; reg < 12; reg ++)
13982 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13986 && !TARGET_SINGLE_PIC_BASE
13987 && arm_pic_register != INVALID_REGNUM
13988 && crtl->uses_pic_offset_table)
13989 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13991 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13992 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13993 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13995 /* LR will also be pushed if any lo regs are pushed. */
13996 if (mask & 0xff || thumb_force_lr_save ())
13997 mask |= (1 << LR_REGNUM);
13999 /* Make sure we have a low work register if we need one.
14000 We will need one if we are going to push a high register,
14001 but we are not currently intending to push a low register. */
14002 if ((mask & 0xff) == 0
14003 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14005 /* Use thumb_find_work_register to choose which register
14006 we will use. If the register is live then we will
14007 have to push it. Use LAST_LO_REGNUM as our fallback
14008 choice for the register to select. */
14009 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14010 /* Make sure the register returned by thumb_find_work_register is
14011 not part of the return value. */
14012 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14013 reg = LAST_LO_REGNUM;
14015 if (! call_used_regs[reg])
14019 /* The 504 below is 8 bytes less than 512 because there are two possible
14020 alignment words. We can't tell here if they will be present or not so we
14021 have to play it safe and assume that they are. */
14022 if ((CALLER_INTERWORKING_SLOT_SIZE +
14023 ROUND_UP_WORD (get_frame_size ()) +
14024 crtl->outgoing_args_size) >= 504)
14026 /* This is the same as the code in thumb1_expand_prologue() which
14027 determines which register to use for stack decrement. */
14028 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14029 if (mask & (1 << reg))
14032 if (reg > LAST_LO_REGNUM)
14034 /* Make sure we have a register available for stack decrement. */
14035 mask |= 1 << LAST_LO_REGNUM;
14043 /* Return the number of bytes required to save VFP registers. */
14045 arm_get_vfp_saved_size (void)
14047 unsigned int regno;
14052 /* Space for saved VFP registers. */
14053 if (TARGET_HARD_FLOAT && TARGET_VFP)
14056 for (regno = FIRST_VFP_REGNUM;
14057 regno < LAST_VFP_REGNUM;
14060 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14061 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14065 /* Workaround ARM10 VFPr1 bug. */
14066 if (count == 2 && !arm_arch6)
14068 saved += count * 8;
14077 if (count == 2 && !arm_arch6)
14079 saved += count * 8;
14086 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14087 everything bar the final return instruction. */
14089 output_return_instruction (rtx operand, int really_return, int reverse)
14091 char conditional[10];
14094 unsigned long live_regs_mask;
14095 unsigned long func_type;
14096 arm_stack_offsets *offsets;
14098 func_type = arm_current_func_type ();
14100 if (IS_NAKED (func_type))
14103 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14105 /* If this function was declared non-returning, and we have
14106 found a tail call, then we have to trust that the called
14107 function won't return. */
14112 /* Otherwise, trap an attempted return by aborting. */
14114 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14116 assemble_external_libcall (ops[1]);
14117 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14123 gcc_assert (!cfun->calls_alloca || really_return);
14125 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14127 cfun->machine->return_used_this_function = 1;
14129 offsets = arm_get_frame_offsets ();
14130 live_regs_mask = offsets->saved_regs_mask;
14132 if (live_regs_mask)
14134 const char * return_reg;
14136 /* If we do not have any special requirements for function exit
14137 (e.g. interworking) then we can load the return address
14138 directly into the PC. Otherwise we must load it into LR. */
14140 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14141 return_reg = reg_names[PC_REGNUM];
14143 return_reg = reg_names[LR_REGNUM];
14145 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14147 /* There are three possible reasons for the IP register
14148 being saved. 1) a stack frame was created, in which case
14149 IP contains the old stack pointer, or 2) an ISR routine
14150 corrupted it, or 3) it was saved to align the stack on
14151 iWMMXt. In case 1, restore IP into SP, otherwise just
14153 if (frame_pointer_needed)
14155 live_regs_mask &= ~ (1 << IP_REGNUM);
14156 live_regs_mask |= (1 << SP_REGNUM);
14159 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14162 /* On some ARM architectures it is faster to use LDR rather than
14163 LDM to load a single register. On other architectures, the
14164 cost is the same. In 26 bit mode, or for exception handlers,
14165 we have to use LDM to load the PC so that the CPSR is also
14167 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14168 if (live_regs_mask == (1U << reg))
14171 if (reg <= LAST_ARM_REGNUM
14172 && (reg != LR_REGNUM
14174 || ! IS_INTERRUPT (func_type)))
14176 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14177 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14184 /* Generate the load multiple instruction to restore the
14185 registers. Note we can get here, even if
14186 frame_pointer_needed is true, but only if sp already
14187 points to the base of the saved core registers. */
14188 if (live_regs_mask & (1 << SP_REGNUM))
14190 unsigned HOST_WIDE_INT stack_adjust;
14192 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14193 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14195 if (stack_adjust && arm_arch5 && TARGET_ARM)
14196 if (TARGET_UNIFIED_ASM)
14197 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14199 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14202 /* If we can't use ldmib (SA110 bug),
14203 then try to pop r3 instead. */
14205 live_regs_mask |= 1 << 3;
14207 if (TARGET_UNIFIED_ASM)
14208 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14210 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14214 if (TARGET_UNIFIED_ASM)
14215 sprintf (instr, "pop%s\t{", conditional);
14217 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14219 p = instr + strlen (instr);
14221 for (reg = 0; reg <= SP_REGNUM; reg++)
14222 if (live_regs_mask & (1 << reg))
14224 int l = strlen (reg_names[reg]);
14230 memcpy (p, ", ", 2);
14234 memcpy (p, "%|", 2);
14235 memcpy (p + 2, reg_names[reg], l);
14239 if (live_regs_mask & (1 << LR_REGNUM))
14241 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14242 /* If returning from an interrupt, restore the CPSR. */
14243 if (IS_INTERRUPT (func_type))
14250 output_asm_insn (instr, & operand);
14252 /* See if we need to generate an extra instruction to
14253 perform the actual function return. */
14255 && func_type != ARM_FT_INTERWORKED
14256 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14258 /* The return has already been handled
14259 by loading the LR into the PC. */
14266 switch ((int) ARM_FUNC_TYPE (func_type))
14270 /* ??? This is wrong for unified assembly syntax. */
14271 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14274 case ARM_FT_INTERWORKED:
14275 sprintf (instr, "bx%s\t%%|lr", conditional);
14278 case ARM_FT_EXCEPTION:
14279 /* ??? This is wrong for unified assembly syntax. */
14280 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14284 /* Use bx if it's available. */
14285 if (arm_arch5 || arm_arch4t)
14286 sprintf (instr, "bx%s\t%%|lr", conditional);
14288 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14292 output_asm_insn (instr, & operand);
14298 /* Write the function name into the code section, directly preceding
14299 the function prologue.
14301 Code will be output similar to this:
14303 .ascii "arm_poke_function_name", 0
14306 .word 0xff000000 + (t1 - t0)
14307 arm_poke_function_name
14309 stmfd sp!, {fp, ip, lr, pc}
14312 When performing a stack backtrace, code can inspect the value
14313 of 'pc' stored at 'fp' + 0. If the trace function then looks
14314 at location pc - 12 and the top 8 bits are set, then we know
14315 that there is a function name embedded immediately preceding this
14316 location and has length ((pc[-3]) & 0xff000000).
14318 We assume that pc is declared as a pointer to an unsigned long.
14320 It is of no benefit to output the function name if we are assembling
14321 a leaf function. These function types will not contain a stack
14322 backtrace structure, therefore it is not possible to determine the
14325 arm_poke_function_name (FILE *stream, const char *name)
14327 unsigned long alignlength;
14328 unsigned long length;
14331 length = strlen (name) + 1;
14332 alignlength = ROUND_UP_WORD (length);
14334 ASM_OUTPUT_ASCII (stream, name, length);
14335 ASM_OUTPUT_ALIGN (stream, 2);
14336 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14337 assemble_aligned_integer (UNITS_PER_WORD, x);
14340 /* Place some comments into the assembler stream
14341 describing the current function. */
14343 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14345 unsigned long func_type;
14349 thumb1_output_function_prologue (f, frame_size);
14353 /* Sanity check. */
14354 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14356 func_type = arm_current_func_type ();
14358 switch ((int) ARM_FUNC_TYPE (func_type))
14361 case ARM_FT_NORMAL:
14363 case ARM_FT_INTERWORKED:
14364 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14367 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14370 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14372 case ARM_FT_EXCEPTION:
14373 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14377 if (IS_NAKED (func_type))
14378 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14380 if (IS_VOLATILE (func_type))
14381 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14383 if (IS_NESTED (func_type))
14384 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14385 if (IS_STACKALIGN (func_type))
14386 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14388 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14390 crtl->args.pretend_args_size, frame_size);
14392 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14393 frame_pointer_needed,
14394 cfun->machine->uses_anonymous_args);
14396 if (cfun->machine->lr_save_eliminated)
14397 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14399 if (crtl->calls_eh_return)
14400 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14405 arm_output_epilogue (rtx sibling)
14408 unsigned long saved_regs_mask;
14409 unsigned long func_type;
14410 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14411 frame that is $fp + 4 for a non-variadic function. */
14412 int floats_offset = 0;
14414 FILE * f = asm_out_file;
14415 unsigned int lrm_count = 0;
14416 int really_return = (sibling == NULL);
14418 arm_stack_offsets *offsets;
14420 /* If we have already generated the return instruction
14421 then it is futile to generate anything else. */
14422 if (use_return_insn (FALSE, sibling) &&
14423 (cfun->machine->return_used_this_function != 0))
14426 func_type = arm_current_func_type ();
14428 if (IS_NAKED (func_type))
14429 /* Naked functions don't have epilogues. */
14432 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14436 /* A volatile function should never return. Call abort. */
14437 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14438 assemble_external_libcall (op);
14439 output_asm_insn ("bl\t%a0", &op);
14444 /* If we are throwing an exception, then we really must be doing a
14445 return, so we can't tail-call. */
14446 gcc_assert (!crtl->calls_eh_return || really_return);
14448 offsets = arm_get_frame_offsets ();
14449 saved_regs_mask = offsets->saved_regs_mask;
14452 lrm_count = bit_count (saved_regs_mask);
14454 floats_offset = offsets->saved_args;
14455 /* Compute how far away the floats will be. */
14456 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14457 if (saved_regs_mask & (1 << reg))
14458 floats_offset += 4;
14460 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14462 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14463 int vfp_offset = offsets->frame;
14465 if (TARGET_FPA_EMU2)
14467 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14468 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14470 floats_offset += 12;
14471 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14472 reg, FP_REGNUM, floats_offset - vfp_offset);
14477 start_reg = LAST_FPA_REGNUM;
14479 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14481 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14483 floats_offset += 12;
14485 /* We can't unstack more than four registers at once. */
14486 if (start_reg - reg == 3)
14488 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14489 reg, FP_REGNUM, floats_offset - vfp_offset);
14490 start_reg = reg - 1;
14495 if (reg != start_reg)
14496 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14497 reg + 1, start_reg - reg,
14498 FP_REGNUM, floats_offset - vfp_offset);
14499 start_reg = reg - 1;
14503 /* Just in case the last register checked also needs unstacking. */
14504 if (reg != start_reg)
14505 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14506 reg + 1, start_reg - reg,
14507 FP_REGNUM, floats_offset - vfp_offset);
14510 if (TARGET_HARD_FLOAT && TARGET_VFP)
14514 /* The fldmd insns do not have base+offset addressing
14515 modes, so we use IP to hold the address. */
14516 saved_size = arm_get_vfp_saved_size ();
14518 if (saved_size > 0)
14520 floats_offset += saved_size;
14521 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14522 FP_REGNUM, floats_offset - vfp_offset);
14524 start_reg = FIRST_VFP_REGNUM;
14525 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14527 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14528 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14530 if (start_reg != reg)
14531 vfp_output_fldmd (f, IP_REGNUM,
14532 (start_reg - FIRST_VFP_REGNUM) / 2,
14533 (reg - start_reg) / 2);
14534 start_reg = reg + 2;
14537 if (start_reg != reg)
14538 vfp_output_fldmd (f, IP_REGNUM,
14539 (start_reg - FIRST_VFP_REGNUM) / 2,
14540 (reg - start_reg) / 2);
14545 /* The frame pointer is guaranteed to be non-double-word aligned.
14546 This is because it is set to (old_stack_pointer - 4) and the
14547 old_stack_pointer was double word aligned. Thus the offset to
14548 the iWMMXt registers to be loaded must also be non-double-word
14549 sized, so that the resultant address *is* double-word aligned.
14550 We can ignore floats_offset since that was already included in
14551 the live_regs_mask. */
14552 lrm_count += (lrm_count % 2 ? 2 : 1);
14554 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14555 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14557 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14558 reg, FP_REGNUM, lrm_count * 4);
14563 /* saved_regs_mask should contain the IP, which at the time of stack
14564 frame generation actually contains the old stack pointer. So a
14565 quick way to unwind the stack is just pop the IP register directly
14566 into the stack pointer. */
14567 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14568 saved_regs_mask &= ~ (1 << IP_REGNUM);
14569 saved_regs_mask |= (1 << SP_REGNUM);
14571 /* There are two registers left in saved_regs_mask - LR and PC. We
14572 only need to restore the LR register (the return address), but to
14573 save time we can load it directly into the PC, unless we need a
14574 special function exit sequence, or we are not really returning. */
14576 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14577 && !crtl->calls_eh_return)
14578 /* Delete the LR from the register mask, so that the LR on
14579 the stack is loaded into the PC in the register mask. */
14580 saved_regs_mask &= ~ (1 << LR_REGNUM);
14582 saved_regs_mask &= ~ (1 << PC_REGNUM);
14584 /* We must use SP as the base register, because SP is one of the
14585 registers being restored. If an interrupt or page fault
14586 happens in the ldm instruction, the SP might or might not
14587 have been restored. That would be bad, as then SP will no
14588 longer indicate the safe area of stack, and we can get stack
14589 corruption. Using SP as the base register means that it will
14590 be reset correctly to the original value, should an interrupt
14591 occur. If the stack pointer already points at the right
14592 place, then omit the subtraction. */
14593 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14594 || cfun->calls_alloca)
14595 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14596 4 * bit_count (saved_regs_mask));
14597 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14599 if (IS_INTERRUPT (func_type))
14600 /* Interrupt handlers will have pushed the
14601 IP onto the stack, so restore it now. */
14602 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14606 /* This branch is executed for ARM mode (non-apcs frames) and
14607 Thumb-2 mode. Frame layout is essentially the same for those
14608 cases, except that in ARM mode frame pointer points to the
14609 first saved register, while in Thumb-2 mode the frame pointer points
14610 to the last saved register.
14612 It is possible to make frame pointer point to last saved
14613 register in both cases, and remove some conditionals below.
14614 That means that fp setup in prologue would be just "mov fp, sp"
14615 and sp restore in epilogue would be just "mov sp, fp", whereas
14616 now we have to use add/sub in those cases. However, the value
14617 of that would be marginal, as both mov and add/sub are 32-bit
14618 in ARM mode, and it would require extra conditionals
14619 in arm_expand_prologue to distingish ARM-apcs-frame case
14620 (where frame pointer is required to point at first register)
14621 and ARM-non-apcs-frame. Therefore, such change is postponed
14622 until real need arise. */
14623 unsigned HOST_WIDE_INT amount;
14625 /* Restore stack pointer if necessary. */
14626 if (TARGET_ARM && frame_pointer_needed)
14628 operands[0] = stack_pointer_rtx;
14629 operands[1] = hard_frame_pointer_rtx;
14631 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14632 output_add_immediate (operands);
14636 if (frame_pointer_needed)
14638 /* For Thumb-2 restore sp from the frame pointer.
14639 Operand restrictions mean we have to incrememnt FP, then copy
14641 amount = offsets->locals_base - offsets->saved_regs;
14642 operands[0] = hard_frame_pointer_rtx;
14646 unsigned long count;
14647 operands[0] = stack_pointer_rtx;
14648 amount = offsets->outgoing_args - offsets->saved_regs;
14649 /* pop call clobbered registers if it avoids a
14650 separate stack adjustment. */
14651 count = offsets->saved_regs - offsets->saved_args;
14654 && !crtl->calls_eh_return
14655 && bit_count(saved_regs_mask) * 4 == count
14656 && !IS_INTERRUPT (func_type)
14657 && !crtl->tail_call_emit)
14659 unsigned long mask;
14660 /* Preserve return values, of any size. */
14661 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14663 mask &= ~saved_regs_mask;
14665 while (bit_count (mask) * 4 > amount)
14667 while ((mask & (1 << reg)) == 0)
14669 mask &= ~(1 << reg);
14671 if (bit_count (mask) * 4 == amount) {
14673 saved_regs_mask |= mask;
14680 operands[1] = operands[0];
14681 operands[2] = GEN_INT (amount);
14682 output_add_immediate (operands);
14684 if (frame_pointer_needed)
14685 asm_fprintf (f, "\tmov\t%r, %r\n",
14686 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14689 if (TARGET_FPA_EMU2)
14691 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14692 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14693 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14698 start_reg = FIRST_FPA_REGNUM;
14700 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14702 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14704 if (reg - start_reg == 3)
14706 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14707 start_reg, SP_REGNUM);
14708 start_reg = reg + 1;
14713 if (reg != start_reg)
14714 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14715 start_reg, reg - start_reg,
14718 start_reg = reg + 1;
14722 /* Just in case the last register checked also needs unstacking. */
14723 if (reg != start_reg)
14724 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14725 start_reg, reg - start_reg, SP_REGNUM);
14728 if (TARGET_HARD_FLOAT && TARGET_VFP)
14730 int end_reg = LAST_VFP_REGNUM + 1;
14732 /* Scan the registers in reverse order. We need to match
14733 any groupings made in the prologue and generate matching
14735 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14737 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14738 && (!df_regs_ever_live_p (reg + 1)
14739 || call_used_regs[reg + 1]))
14741 if (end_reg > reg + 2)
14742 vfp_output_fldmd (f, SP_REGNUM,
14743 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14744 (end_reg - (reg + 2)) / 2);
14748 if (end_reg > reg + 2)
14749 vfp_output_fldmd (f, SP_REGNUM, 0,
14750 (end_reg - (reg + 2)) / 2);
14754 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14755 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14756 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14758 /* If we can, restore the LR into the PC. */
14759 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14760 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14761 && !IS_STACKALIGN (func_type)
14763 && crtl->args.pretend_args_size == 0
14764 && saved_regs_mask & (1 << LR_REGNUM)
14765 && !crtl->calls_eh_return)
14767 saved_regs_mask &= ~ (1 << LR_REGNUM);
14768 saved_regs_mask |= (1 << PC_REGNUM);
14769 rfe = IS_INTERRUPT (func_type);
14774 /* Load the registers off the stack. If we only have one register
14775 to load use the LDR instruction - it is faster. For Thumb-2
14776 always use pop and the assembler will pick the best instruction.*/
14777 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14778 && !IS_INTERRUPT(func_type))
14780 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14782 else if (saved_regs_mask)
14784 if (saved_regs_mask & (1 << SP_REGNUM))
14785 /* Note - write back to the stack register is not enabled
14786 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14787 in the list of registers and if we add writeback the
14788 instruction becomes UNPREDICTABLE. */
14789 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14791 else if (TARGET_ARM)
14792 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14795 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14798 if (crtl->args.pretend_args_size)
14800 /* Unwind the pre-pushed regs. */
14801 operands[0] = operands[1] = stack_pointer_rtx;
14802 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14803 output_add_immediate (operands);
14807 /* We may have already restored PC directly from the stack. */
14808 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14811 /* Stack adjustment for exception handler. */
14812 if (crtl->calls_eh_return)
14813 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14814 ARM_EH_STACKADJ_REGNUM);
14816 /* Generate the return instruction. */
14817 switch ((int) ARM_FUNC_TYPE (func_type))
14821 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14824 case ARM_FT_EXCEPTION:
14825 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14828 case ARM_FT_INTERWORKED:
14829 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14833 if (IS_STACKALIGN (func_type))
14835 /* See comment in arm_expand_prologue. */
14836 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14838 if (arm_arch5 || arm_arch4t)
14839 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14841 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14849 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14850 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14852 arm_stack_offsets *offsets;
14858 /* Emit any call-via-reg trampolines that are needed for v4t support
14859 of call_reg and call_value_reg type insns. */
14860 for (regno = 0; regno < LR_REGNUM; regno++)
14862 rtx label = cfun->machine->call_via[regno];
14866 switch_to_section (function_section (current_function_decl));
14867 targetm.asm_out.internal_label (asm_out_file, "L",
14868 CODE_LABEL_NUMBER (label));
14869 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14873 /* ??? Probably not safe to set this here, since it assumes that a
14874 function will be emitted as assembly immediately after we generate
14875 RTL for it. This does not happen for inline functions. */
14876 cfun->machine->return_used_this_function = 0;
14878 else /* TARGET_32BIT */
14880 /* We need to take into account any stack-frame rounding. */
14881 offsets = arm_get_frame_offsets ();
14883 gcc_assert (!use_return_insn (FALSE, NULL)
14884 || (cfun->machine->return_used_this_function != 0)
14885 || offsets->saved_regs == offsets->outgoing_args
14886 || frame_pointer_needed);
14888 /* Reset the ARM-specific per-function variables. */
14889 after_arm_reorg = 0;
14893 /* Generate and emit an insn that we will recognize as a push_multi.
14894 Unfortunately, since this insn does not reflect very well the actual
14895 semantics of the operation, we need to annotate the insn for the benefit
14896 of DWARF2 frame unwind information. */
14898 emit_multi_reg_push (unsigned long mask)
14901 int num_dwarf_regs;
14905 int dwarf_par_index;
14908 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14909 if (mask & (1 << i))
14912 gcc_assert (num_regs && num_regs <= 16);
14914 /* We don't record the PC in the dwarf frame information. */
14915 num_dwarf_regs = num_regs;
14916 if (mask & (1 << PC_REGNUM))
14919 /* For the body of the insn we are going to generate an UNSPEC in
14920 parallel with several USEs. This allows the insn to be recognized
14921 by the push_multi pattern in the arm.md file.
14923 The body of the insn looks something like this:
14926 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14927 (const_int:SI <num>)))
14928 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14934 For the frame note however, we try to be more explicit and actually
14935 show each register being stored into the stack frame, plus a (single)
14936 decrement of the stack pointer. We do it this way in order to be
14937 friendly to the stack unwinding code, which only wants to see a single
14938 stack decrement per instruction. The RTL we generate for the note looks
14939 something like this:
14942 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14943 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14944 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14945 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14949 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14950 instead we'd have a parallel expression detailing all
14951 the stores to the various memory addresses so that debug
14952 information is more up-to-date. Remember however while writing
14953 this to take care of the constraints with the push instruction.
14955 Note also that this has to be taken care of for the VFP registers.
14957 For more see PR43399. */
14959 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14960 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14961 dwarf_par_index = 1;
14963 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14965 if (mask & (1 << i))
14967 reg = gen_rtx_REG (SImode, i);
14969 XVECEXP (par, 0, 0)
14970 = gen_rtx_SET (VOIDmode,
14973 gen_rtx_PRE_MODIFY (Pmode,
14976 (stack_pointer_rtx,
14979 gen_rtx_UNSPEC (BLKmode,
14980 gen_rtvec (1, reg),
14981 UNSPEC_PUSH_MULT));
14983 if (i != PC_REGNUM)
14985 tmp = gen_rtx_SET (VOIDmode,
14986 gen_frame_mem (SImode, stack_pointer_rtx),
14988 RTX_FRAME_RELATED_P (tmp) = 1;
14989 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14997 for (j = 1, i++; j < num_regs; i++)
14999 if (mask & (1 << i))
15001 reg = gen_rtx_REG (SImode, i);
15003 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15005 if (i != PC_REGNUM)
15008 = gen_rtx_SET (VOIDmode,
15011 plus_constant (stack_pointer_rtx,
15014 RTX_FRAME_RELATED_P (tmp) = 1;
15015 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15022 par = emit_insn (par);
15024 tmp = gen_rtx_SET (VOIDmode,
15026 plus_constant (stack_pointer_rtx, -4 * num_regs));
15027 RTX_FRAME_RELATED_P (tmp) = 1;
15028 XVECEXP (dwarf, 0, 0) = tmp;
15030 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15035 /* Calculate the size of the return value that is passed in registers. */
15037 arm_size_return_regs (void)
15039 enum machine_mode mode;
15041 if (crtl->return_rtx != 0)
15042 mode = GET_MODE (crtl->return_rtx);
15044 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15046 return GET_MODE_SIZE (mode);
15050 emit_sfm (int base_reg, int count)
15057 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15058 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15060 reg = gen_rtx_REG (XFmode, base_reg++);
15062 XVECEXP (par, 0, 0)
15063 = gen_rtx_SET (VOIDmode,
15066 gen_rtx_PRE_MODIFY (Pmode,
15069 (stack_pointer_rtx,
15072 gen_rtx_UNSPEC (BLKmode,
15073 gen_rtvec (1, reg),
15074 UNSPEC_PUSH_MULT));
15075 tmp = gen_rtx_SET (VOIDmode,
15076 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15077 RTX_FRAME_RELATED_P (tmp) = 1;
15078 XVECEXP (dwarf, 0, 1) = tmp;
15080 for (i = 1; i < count; i++)
15082 reg = gen_rtx_REG (XFmode, base_reg++);
15083 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15085 tmp = gen_rtx_SET (VOIDmode,
15086 gen_frame_mem (XFmode,
15087 plus_constant (stack_pointer_rtx,
15090 RTX_FRAME_RELATED_P (tmp) = 1;
15091 XVECEXP (dwarf, 0, i + 1) = tmp;
15094 tmp = gen_rtx_SET (VOIDmode,
15096 plus_constant (stack_pointer_rtx, -12 * count));
15098 RTX_FRAME_RELATED_P (tmp) = 1;
15099 XVECEXP (dwarf, 0, 0) = tmp;
15101 par = emit_insn (par);
15102 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15108 /* Return true if the current function needs to save/restore LR. */
15111 thumb_force_lr_save (void)
15113 return !cfun->machine->lr_save_eliminated
15114 && (!leaf_function_p ()
15115 || thumb_far_jump_used_p ()
15116 || df_regs_ever_live_p (LR_REGNUM));
15120 /* Return true if r3 is used by any of the tail call insns in the
15121 current function. */
15124 any_sibcall_uses_r3 (void)
15129 if (!crtl->tail_call_emit)
15131 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15132 if (e->flags & EDGE_SIBCALL)
15134 rtx call = BB_END (e->src);
15135 if (!CALL_P (call))
15136 call = prev_nonnote_nondebug_insn (call);
15137 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15138 if (find_regno_fusage (call, USE, 3))
15145 /* Compute the distance from register FROM to register TO.
15146 These can be the arg pointer (26), the soft frame pointer (25),
15147 the stack pointer (13) or the hard frame pointer (11).
15148 In thumb mode r7 is used as the soft frame pointer, if needed.
15149 Typical stack layout looks like this:
15151 old stack pointer -> | |
15154 | | saved arguments for
15155 | | vararg functions
15158 hard FP & arg pointer -> | | \
15166 soft frame pointer -> | | /
15171 locals base pointer -> | | /
15176 current stack pointer -> | | /
15179 For a given function some or all of these stack components
15180 may not be needed, giving rise to the possibility of
15181 eliminating some of the registers.
15183 The values returned by this function must reflect the behavior
15184 of arm_expand_prologue() and arm_compute_save_reg_mask().
15186 The sign of the number returned reflects the direction of stack
15187 growth, so the values are positive for all eliminations except
15188 from the soft frame pointer to the hard frame pointer.
15190 SFP may point just inside the local variables block to ensure correct
15194 /* Calculate stack offsets. These are used to calculate register elimination
15195 offsets and in prologue/epilogue code. Also calculates which registers
15196 should be saved. */
15198 static arm_stack_offsets *
15199 arm_get_frame_offsets (void)
15201 struct arm_stack_offsets *offsets;
15202 unsigned long func_type;
15206 HOST_WIDE_INT frame_size;
15209 offsets = &cfun->machine->stack_offsets;
15211 /* We need to know if we are a leaf function. Unfortunately, it
15212 is possible to be called after start_sequence has been called,
15213 which causes get_insns to return the insns for the sequence,
15214 not the function, which will cause leaf_function_p to return
15215 the incorrect result.
15217 to know about leaf functions once reload has completed, and the
15218 frame size cannot be changed after that time, so we can safely
15219 use the cached value. */
15221 if (reload_completed)
15224 /* Initially this is the size of the local variables. It will translated
15225 into an offset once we have determined the size of preceding data. */
15226 frame_size = ROUND_UP_WORD (get_frame_size ());
15228 leaf = leaf_function_p ();
15230 /* Space for variadic functions. */
15231 offsets->saved_args = crtl->args.pretend_args_size;
15233 /* In Thumb mode this is incorrect, but never used. */
15234 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15235 arm_compute_static_chain_stack_bytes();
15239 unsigned int regno;
15241 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15242 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15243 saved = core_saved;
15245 /* We know that SP will be doubleword aligned on entry, and we must
15246 preserve that condition at any subroutine call. We also require the
15247 soft frame pointer to be doubleword aligned. */
15249 if (TARGET_REALLY_IWMMXT)
15251 /* Check for the call-saved iWMMXt registers. */
15252 for (regno = FIRST_IWMMXT_REGNUM;
15253 regno <= LAST_IWMMXT_REGNUM;
15255 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15259 func_type = arm_current_func_type ();
15260 if (! IS_VOLATILE (func_type))
15262 /* Space for saved FPA registers. */
15263 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15264 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15267 /* Space for saved VFP registers. */
15268 if (TARGET_HARD_FLOAT && TARGET_VFP)
15269 saved += arm_get_vfp_saved_size ();
15272 else /* TARGET_THUMB1 */
15274 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15275 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15276 saved = core_saved;
15277 if (TARGET_BACKTRACE)
15281 /* Saved registers include the stack frame. */
15282 offsets->saved_regs = offsets->saved_args + saved +
15283 arm_compute_static_chain_stack_bytes();
15284 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15285 /* A leaf function does not need any stack alignment if it has nothing
15287 if (leaf && frame_size == 0
15288 /* However if it calls alloca(), we have a dynamically allocated
15289 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15290 && ! cfun->calls_alloca)
15292 offsets->outgoing_args = offsets->soft_frame;
15293 offsets->locals_base = offsets->soft_frame;
15297 /* Ensure SFP has the correct alignment. */
15298 if (ARM_DOUBLEWORD_ALIGN
15299 && (offsets->soft_frame & 7))
15301 offsets->soft_frame += 4;
15302 /* Try to align stack by pushing an extra reg. Don't bother doing this
15303 when there is a stack frame as the alignment will be rolled into
15304 the normal stack adjustment. */
15305 if (frame_size + crtl->outgoing_args_size == 0)
15309 /* If it is safe to use r3, then do so. This sometimes
15310 generates better code on Thumb-2 by avoiding the need to
15311 use 32-bit push/pop instructions. */
15312 if (! any_sibcall_uses_r3 ()
15313 && arm_size_return_regs () <= 12
15314 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15319 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15321 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15330 offsets->saved_regs += 4;
15331 offsets->saved_regs_mask |= (1 << reg);
15336 offsets->locals_base = offsets->soft_frame + frame_size;
15337 offsets->outgoing_args = (offsets->locals_base
15338 + crtl->outgoing_args_size);
15340 if (ARM_DOUBLEWORD_ALIGN)
15342 /* Ensure SP remains doubleword aligned. */
15343 if (offsets->outgoing_args & 7)
15344 offsets->outgoing_args += 4;
15345 gcc_assert (!(offsets->outgoing_args & 7));
15352 /* Calculate the relative offsets for the different stack pointers. Positive
15353 offsets are in the direction of stack growth. */
15356 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15358 arm_stack_offsets *offsets;
15360 offsets = arm_get_frame_offsets ();
15362 /* OK, now we have enough information to compute the distances.
15363 There must be an entry in these switch tables for each pair
15364 of registers in ELIMINABLE_REGS, even if some of the entries
15365 seem to be redundant or useless. */
15368 case ARG_POINTER_REGNUM:
15371 case THUMB_HARD_FRAME_POINTER_REGNUM:
15374 case FRAME_POINTER_REGNUM:
15375 /* This is the reverse of the soft frame pointer
15376 to hard frame pointer elimination below. */
15377 return offsets->soft_frame - offsets->saved_args;
15379 case ARM_HARD_FRAME_POINTER_REGNUM:
15380 /* This is only non-zero in the case where the static chain register
15381 is stored above the frame. */
15382 return offsets->frame - offsets->saved_args - 4;
15384 case STACK_POINTER_REGNUM:
15385 /* If nothing has been pushed on the stack at all
15386 then this will return -4. This *is* correct! */
15387 return offsets->outgoing_args - (offsets->saved_args + 4);
15390 gcc_unreachable ();
15392 gcc_unreachable ();
15394 case FRAME_POINTER_REGNUM:
15397 case THUMB_HARD_FRAME_POINTER_REGNUM:
15400 case ARM_HARD_FRAME_POINTER_REGNUM:
15401 /* The hard frame pointer points to the top entry in the
15402 stack frame. The soft frame pointer to the bottom entry
15403 in the stack frame. If there is no stack frame at all,
15404 then they are identical. */
15406 return offsets->frame - offsets->soft_frame;
15408 case STACK_POINTER_REGNUM:
15409 return offsets->outgoing_args - offsets->soft_frame;
15412 gcc_unreachable ();
15414 gcc_unreachable ();
15417 /* You cannot eliminate from the stack pointer.
15418 In theory you could eliminate from the hard frame
15419 pointer to the stack pointer, but this will never
15420 happen, since if a stack frame is not needed the
15421 hard frame pointer will never be used. */
15422 gcc_unreachable ();
15426 /* Given FROM and TO register numbers, say whether this elimination is
15427 allowed. Frame pointer elimination is automatically handled.
15429 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15430 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15431 pointer, we must eliminate FRAME_POINTER_REGNUM into
15432 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15433 ARG_POINTER_REGNUM. */
15436 arm_can_eliminate (const int from, const int to)
15438 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15439 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15440 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15441 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15445 /* Emit RTL to save coprocessor registers on function entry. Returns the
15446 number of bytes pushed. */
15449 arm_save_coproc_regs(void)
15451 int saved_size = 0;
15453 unsigned start_reg;
15456 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15457 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15459 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15460 insn = gen_rtx_MEM (V2SImode, insn);
15461 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15462 RTX_FRAME_RELATED_P (insn) = 1;
15466 /* Save any floating point call-saved registers used by this
15468 if (TARGET_FPA_EMU2)
15470 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15471 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15473 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15474 insn = gen_rtx_MEM (XFmode, insn);
15475 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15476 RTX_FRAME_RELATED_P (insn) = 1;
15482 start_reg = LAST_FPA_REGNUM;
15484 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15486 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15488 if (start_reg - reg == 3)
15490 insn = emit_sfm (reg, 4);
15491 RTX_FRAME_RELATED_P (insn) = 1;
15493 start_reg = reg - 1;
15498 if (start_reg != reg)
15500 insn = emit_sfm (reg + 1, start_reg - reg);
15501 RTX_FRAME_RELATED_P (insn) = 1;
15502 saved_size += (start_reg - reg) * 12;
15504 start_reg = reg - 1;
15508 if (start_reg != reg)
15510 insn = emit_sfm (reg + 1, start_reg - reg);
15511 saved_size += (start_reg - reg) * 12;
15512 RTX_FRAME_RELATED_P (insn) = 1;
15515 if (TARGET_HARD_FLOAT && TARGET_VFP)
15517 start_reg = FIRST_VFP_REGNUM;
15519 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15521 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15522 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15524 if (start_reg != reg)
15525 saved_size += vfp_emit_fstmd (start_reg,
15526 (reg - start_reg) / 2);
15527 start_reg = reg + 2;
15530 if (start_reg != reg)
15531 saved_size += vfp_emit_fstmd (start_reg,
15532 (reg - start_reg) / 2);
15538 /* Set the Thumb frame pointer from the stack pointer. */
15541 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15543 HOST_WIDE_INT amount;
15546 amount = offsets->outgoing_args - offsets->locals_base;
15548 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15549 stack_pointer_rtx, GEN_INT (amount)));
15552 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15553 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15554 expects the first two operands to be the same. */
15557 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15559 hard_frame_pointer_rtx));
15563 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15564 hard_frame_pointer_rtx,
15565 stack_pointer_rtx));
15567 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15568 plus_constant (stack_pointer_rtx, amount));
15569 RTX_FRAME_RELATED_P (dwarf) = 1;
15570 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15573 RTX_FRAME_RELATED_P (insn) = 1;
15576 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15579 arm_expand_prologue (void)
15584 unsigned long live_regs_mask;
15585 unsigned long func_type;
15587 int saved_pretend_args = 0;
15588 int saved_regs = 0;
15589 unsigned HOST_WIDE_INT args_to_push;
15590 arm_stack_offsets *offsets;
15592 func_type = arm_current_func_type ();
15594 /* Naked functions don't have prologues. */
15595 if (IS_NAKED (func_type))
15598 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15599 args_to_push = crtl->args.pretend_args_size;
15601 /* Compute which register we will have to save onto the stack. */
15602 offsets = arm_get_frame_offsets ();
15603 live_regs_mask = offsets->saved_regs_mask;
15605 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15607 if (IS_STACKALIGN (func_type))
15612 /* Handle a word-aligned stack pointer. We generate the following:
15617 <save and restore r0 in normal prologue/epilogue>
15621 The unwinder doesn't need to know about the stack realignment.
15622 Just tell it we saved SP in r0. */
15623 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15625 r0 = gen_rtx_REG (SImode, 0);
15626 r1 = gen_rtx_REG (SImode, 1);
15627 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15628 compiler won't choke. */
15629 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15630 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15631 insn = gen_movsi (r0, stack_pointer_rtx);
15632 RTX_FRAME_RELATED_P (insn) = 1;
15633 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15635 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15636 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15639 /* For APCS frames, if IP register is clobbered
15640 when creating frame, save that register in a special
15642 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15644 if (IS_INTERRUPT (func_type))
15646 /* Interrupt functions must not corrupt any registers.
15647 Creating a frame pointer however, corrupts the IP
15648 register, so we must push it first. */
15649 insn = emit_multi_reg_push (1 << IP_REGNUM);
15651 /* Do not set RTX_FRAME_RELATED_P on this insn.
15652 The dwarf stack unwinding code only wants to see one
15653 stack decrement per function, and this is not it. If
15654 this instruction is labeled as being part of the frame
15655 creation sequence then dwarf2out_frame_debug_expr will
15656 die when it encounters the assignment of IP to FP
15657 later on, since the use of SP here establishes SP as
15658 the CFA register and not IP.
15660 Anyway this instruction is not really part of the stack
15661 frame creation although it is part of the prologue. */
15663 else if (IS_NESTED (func_type))
15665 /* The Static chain register is the same as the IP register
15666 used as a scratch register during stack frame creation.
15667 To get around this need to find somewhere to store IP
15668 whilst the frame is being created. We try the following
15671 1. The last argument register.
15672 2. A slot on the stack above the frame. (This only
15673 works if the function is not a varargs function).
15674 3. Register r3, after pushing the argument registers
15677 Note - we only need to tell the dwarf2 backend about the SP
15678 adjustment in the second variant; the static chain register
15679 doesn't need to be unwound, as it doesn't contain a value
15680 inherited from the caller. */
15682 if (df_regs_ever_live_p (3) == false)
15683 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15684 else if (args_to_push == 0)
15688 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15691 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15692 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15695 /* Just tell the dwarf backend that we adjusted SP. */
15696 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15697 plus_constant (stack_pointer_rtx,
15699 RTX_FRAME_RELATED_P (insn) = 1;
15700 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15704 /* Store the args on the stack. */
15705 if (cfun->machine->uses_anonymous_args)
15706 insn = emit_multi_reg_push
15707 ((0xf0 >> (args_to_push / 4)) & 0xf);
15710 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15711 GEN_INT (- args_to_push)));
15713 RTX_FRAME_RELATED_P (insn) = 1;
15715 saved_pretend_args = 1;
15716 fp_offset = args_to_push;
15719 /* Now reuse r3 to preserve IP. */
15720 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15724 insn = emit_set_insn (ip_rtx,
15725 plus_constant (stack_pointer_rtx, fp_offset));
15726 RTX_FRAME_RELATED_P (insn) = 1;
15731 /* Push the argument registers, or reserve space for them. */
15732 if (cfun->machine->uses_anonymous_args)
15733 insn = emit_multi_reg_push
15734 ((0xf0 >> (args_to_push / 4)) & 0xf);
15737 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15738 GEN_INT (- args_to_push)));
15739 RTX_FRAME_RELATED_P (insn) = 1;
15742 /* If this is an interrupt service routine, and the link register
15743 is going to be pushed, and we're not generating extra
15744 push of IP (needed when frame is needed and frame layout if apcs),
15745 subtracting four from LR now will mean that the function return
15746 can be done with a single instruction. */
15747 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15748 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15749 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15752 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15754 emit_set_insn (lr, plus_constant (lr, -4));
15757 if (live_regs_mask)
15759 saved_regs += bit_count (live_regs_mask) * 4;
15760 if (optimize_size && !frame_pointer_needed
15761 && saved_regs == offsets->saved_regs - offsets->saved_args)
15763 /* If no coprocessor registers are being pushed and we don't have
15764 to worry about a frame pointer then push extra registers to
15765 create the stack frame. This is done is a way that does not
15766 alter the frame layout, so is independent of the epilogue. */
15770 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15772 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15773 if (frame && n * 4 >= frame)
15776 live_regs_mask |= (1 << n) - 1;
15777 saved_regs += frame;
15780 insn = emit_multi_reg_push (live_regs_mask);
15781 RTX_FRAME_RELATED_P (insn) = 1;
15784 if (! IS_VOLATILE (func_type))
15785 saved_regs += arm_save_coproc_regs ();
15787 if (frame_pointer_needed && TARGET_ARM)
15789 /* Create the new frame pointer. */
15790 if (TARGET_APCS_FRAME)
15792 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15793 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15794 RTX_FRAME_RELATED_P (insn) = 1;
15796 if (IS_NESTED (func_type))
15798 /* Recover the static chain register. */
15799 if (!df_regs_ever_live_p (3)
15800 || saved_pretend_args)
15801 insn = gen_rtx_REG (SImode, 3);
15802 else /* if (crtl->args.pretend_args_size == 0) */
15804 insn = plus_constant (hard_frame_pointer_rtx, 4);
15805 insn = gen_frame_mem (SImode, insn);
15807 emit_set_insn (ip_rtx, insn);
15808 /* Add a USE to stop propagate_one_insn() from barfing. */
15809 emit_insn (gen_prologue_use (ip_rtx));
15814 insn = GEN_INT (saved_regs - 4);
15815 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15816 stack_pointer_rtx, insn));
15817 RTX_FRAME_RELATED_P (insn) = 1;
15821 if (flag_stack_usage)
15822 current_function_static_stack_size
15823 = offsets->outgoing_args - offsets->saved_args;
15825 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15827 /* This add can produce multiple insns for a large constant, so we
15828 need to get tricky. */
15829 rtx last = get_last_insn ();
15831 amount = GEN_INT (offsets->saved_args + saved_regs
15832 - offsets->outgoing_args);
15834 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15838 last = last ? NEXT_INSN (last) : get_insns ();
15839 RTX_FRAME_RELATED_P (last) = 1;
15841 while (last != insn);
15843 /* If the frame pointer is needed, emit a special barrier that
15844 will prevent the scheduler from moving stores to the frame
15845 before the stack adjustment. */
15846 if (frame_pointer_needed)
15847 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15848 hard_frame_pointer_rtx));
15852 if (frame_pointer_needed && TARGET_THUMB2)
15853 thumb_set_frame_pointer (offsets);
15855 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15857 unsigned long mask;
15859 mask = live_regs_mask;
15860 mask &= THUMB2_WORK_REGS;
15861 if (!IS_NESTED (func_type))
15862 mask |= (1 << IP_REGNUM);
15863 arm_load_pic_register (mask);
15866 /* If we are profiling, make sure no instructions are scheduled before
15867 the call to mcount. Similarly if the user has requested no
15868 scheduling in the prolog. Similarly if we want non-call exceptions
15869 using the EABI unwinder, to prevent faulting instructions from being
15870 swapped with a stack adjustment. */
15871 if (crtl->profile || !TARGET_SCHED_PROLOG
15872 || (arm_except_unwind_info (&global_options) == UI_TARGET
15873 && cfun->can_throw_non_call_exceptions))
15874 emit_insn (gen_blockage ());
15876 /* If the link register is being kept alive, with the return address in it,
15877 then make sure that it does not get reused by the ce2 pass. */
15878 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15879 cfun->machine->lr_save_eliminated = 1;
15882 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15884 arm_print_condition (FILE *stream)
15886 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15888 /* Branch conversion is not implemented for Thumb-2. */
15891 output_operand_lossage ("predicated Thumb instruction");
15894 if (current_insn_predicate != NULL)
15896 output_operand_lossage
15897 ("predicated instruction in conditional sequence");
15901 fputs (arm_condition_codes[arm_current_cc], stream);
15903 else if (current_insn_predicate)
15905 enum arm_cond_code code;
15909 output_operand_lossage ("predicated Thumb instruction");
15913 code = get_arm_condition_code (current_insn_predicate);
15914 fputs (arm_condition_codes[code], stream);
15919 /* If CODE is 'd', then the X is a condition operand and the instruction
15920 should only be executed if the condition is true.
15921 if CODE is 'D', then the X is a condition operand and the instruction
15922 should only be executed if the condition is false: however, if the mode
15923 of the comparison is CCFPEmode, then always execute the instruction -- we
15924 do this because in these circumstances !GE does not necessarily imply LT;
15925 in these cases the instruction pattern will take care to make sure that
15926 an instruction containing %d will follow, thereby undoing the effects of
15927 doing this instruction unconditionally.
15928 If CODE is 'N' then X is a floating point operand that must be negated
15930 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15931 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15933 arm_print_operand (FILE *stream, rtx x, int code)
15938 fputs (ASM_COMMENT_START, stream);
15942 fputs (user_label_prefix, stream);
15946 fputs (REGISTER_PREFIX, stream);
15950 arm_print_condition (stream);
15954 /* Nothing in unified syntax, otherwise the current condition code. */
15955 if (!TARGET_UNIFIED_ASM)
15956 arm_print_condition (stream);
15960 /* The current condition code in unified syntax, otherwise nothing. */
15961 if (TARGET_UNIFIED_ASM)
15962 arm_print_condition (stream);
15966 /* The current condition code for a condition code setting instruction.
15967 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15968 if (TARGET_UNIFIED_ASM)
15970 fputc('s', stream);
15971 arm_print_condition (stream);
15975 arm_print_condition (stream);
15976 fputc('s', stream);
15981 /* If the instruction is conditionally executed then print
15982 the current condition code, otherwise print 's'. */
15983 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15984 if (current_insn_predicate)
15985 arm_print_condition (stream);
15987 fputc('s', stream);
15990 /* %# is a "break" sequence. It doesn't output anything, but is used to
15991 separate e.g. operand numbers from following text, if that text consists
15992 of further digits which we don't want to be part of the operand
16000 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16001 r = real_value_negate (&r);
16002 fprintf (stream, "%s", fp_const_from_val (&r));
16006 /* An integer or symbol address without a preceding # sign. */
16008 switch (GET_CODE (x))
16011 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16015 output_addr_const (stream, x);
16019 gcc_unreachable ();
16024 if (GET_CODE (x) == CONST_INT)
16027 val = ARM_SIGN_EXTEND (~INTVAL (x));
16028 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16032 putc ('~', stream);
16033 output_addr_const (stream, x);
16038 /* The low 16 bits of an immediate constant. */
16039 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16043 fprintf (stream, "%s", arithmetic_instr (x, 1));
16046 /* Truncate Cirrus shift counts. */
16048 if (GET_CODE (x) == CONST_INT)
16050 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16053 arm_print_operand (stream, x, 0);
16057 fprintf (stream, "%s", arithmetic_instr (x, 0));
16065 if (!shift_operator (x, SImode))
16067 output_operand_lossage ("invalid shift operand");
16071 shift = shift_op (x, &val);
16075 fprintf (stream, ", %s ", shift);
16077 arm_print_operand (stream, XEXP (x, 1), 0);
16079 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16084 /* An explanation of the 'Q', 'R' and 'H' register operands:
16086 In a pair of registers containing a DI or DF value the 'Q'
16087 operand returns the register number of the register containing
16088 the least significant part of the value. The 'R' operand returns
16089 the register number of the register containing the most
16090 significant part of the value.
16092 The 'H' operand returns the higher of the two register numbers.
16093 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16094 same as the 'Q' operand, since the most significant part of the
16095 value is held in the lower number register. The reverse is true
16096 on systems where WORDS_BIG_ENDIAN is false.
16098 The purpose of these operands is to distinguish between cases
16099 where the endian-ness of the values is important (for example
16100 when they are added together), and cases where the endian-ness
16101 is irrelevant, but the order of register operations is important.
16102 For example when loading a value from memory into a register
16103 pair, the endian-ness does not matter. Provided that the value
16104 from the lower memory address is put into the lower numbered
16105 register, and the value from the higher address is put into the
16106 higher numbered register, the load will work regardless of whether
16107 the value being loaded is big-wordian or little-wordian. The
16108 order of the two register loads can matter however, if the address
16109 of the memory location is actually held in one of the registers
16110 being overwritten by the load.
16112 The 'Q' and 'R' constraints are also available for 64-bit
16115 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16117 rtx part = gen_lowpart (SImode, x);
16118 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16122 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16124 output_operand_lossage ("invalid operand for code '%c'", code);
16128 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16132 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16134 enum machine_mode mode = GET_MODE (x);
16137 if (mode == VOIDmode)
16139 part = gen_highpart_mode (SImode, mode, x);
16140 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16144 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16146 output_operand_lossage ("invalid operand for code '%c'", code);
16150 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16154 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16156 output_operand_lossage ("invalid operand for code '%c'", code);
16160 asm_fprintf (stream, "%r", REGNO (x) + 1);
16164 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16166 output_operand_lossage ("invalid operand for code '%c'", code);
16170 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16174 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16176 output_operand_lossage ("invalid operand for code '%c'", code);
16180 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16184 asm_fprintf (stream, "%r",
16185 GET_CODE (XEXP (x, 0)) == REG
16186 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16190 asm_fprintf (stream, "{%r-%r}",
16192 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16195 /* Like 'M', but writing doubleword vector registers, for use by Neon
16199 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16200 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16202 asm_fprintf (stream, "{d%d}", regno);
16204 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16209 /* CONST_TRUE_RTX means always -- that's the default. */
16210 if (x == const_true_rtx)
16213 if (!COMPARISON_P (x))
16215 output_operand_lossage ("invalid operand for code '%c'", code);
16219 fputs (arm_condition_codes[get_arm_condition_code (x)],
16224 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16225 want to do that. */
16226 if (x == const_true_rtx)
16228 output_operand_lossage ("instruction never executed");
16231 if (!COMPARISON_P (x))
16233 output_operand_lossage ("invalid operand for code '%c'", code);
16237 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16238 (get_arm_condition_code (x))],
16242 /* Cirrus registers can be accessed in a variety of ways:
16243 single floating point (f)
16244 double floating point (d)
16246 64bit integer (dx). */
16247 case 'W': /* Cirrus register in F mode. */
16248 case 'X': /* Cirrus register in D mode. */
16249 case 'Y': /* Cirrus register in FX mode. */
16250 case 'Z': /* Cirrus register in DX mode. */
16251 gcc_assert (GET_CODE (x) == REG
16252 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16254 fprintf (stream, "mv%s%s",
16256 : code == 'X' ? "d"
16257 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16261 /* Print cirrus register in the mode specified by the register's mode. */
16264 int mode = GET_MODE (x);
16266 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16268 output_operand_lossage ("invalid operand for code '%c'", code);
16272 fprintf (stream, "mv%s%s",
16273 mode == DFmode ? "d"
16274 : mode == SImode ? "fx"
16275 : mode == DImode ? "dx"
16276 : "f", reg_names[REGNO (x)] + 2);
16282 if (GET_CODE (x) != REG
16283 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16284 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16285 /* Bad value for wCG register number. */
16287 output_operand_lossage ("invalid operand for code '%c'", code);
16292 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16295 /* Print an iWMMXt control register name. */
16297 if (GET_CODE (x) != CONST_INT
16299 || INTVAL (x) >= 16)
16300 /* Bad value for wC register number. */
16302 output_operand_lossage ("invalid operand for code '%c'", code);
16308 static const char * wc_reg_names [16] =
16310 "wCID", "wCon", "wCSSF", "wCASF",
16311 "wC4", "wC5", "wC6", "wC7",
16312 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16313 "wC12", "wC13", "wC14", "wC15"
16316 fprintf (stream, wc_reg_names [INTVAL (x)]);
16320 /* Print the high single-precision register of a VFP double-precision
16324 int mode = GET_MODE (x);
16327 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16329 output_operand_lossage ("invalid operand for code '%c'", code);
16334 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16336 output_operand_lossage ("invalid operand for code '%c'", code);
16340 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16344 /* Print a VFP/Neon double precision or quad precision register name. */
16348 int mode = GET_MODE (x);
16349 int is_quad = (code == 'q');
16352 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16354 output_operand_lossage ("invalid operand for code '%c'", code);
16358 if (GET_CODE (x) != REG
16359 || !IS_VFP_REGNUM (REGNO (x)))
16361 output_operand_lossage ("invalid operand for code '%c'", code);
16366 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16367 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16369 output_operand_lossage ("invalid operand for code '%c'", code);
16373 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16374 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16378 /* These two codes print the low/high doubleword register of a Neon quad
16379 register, respectively. For pair-structure types, can also print
16380 low/high quadword registers. */
16384 int mode = GET_MODE (x);
16387 if ((GET_MODE_SIZE (mode) != 16
16388 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16390 output_operand_lossage ("invalid operand for code '%c'", code);
16395 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16397 output_operand_lossage ("invalid operand for code '%c'", code);
16401 if (GET_MODE_SIZE (mode) == 16)
16402 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16403 + (code == 'f' ? 1 : 0));
16405 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16406 + (code == 'f' ? 1 : 0));
16410 /* Print a VFPv3 floating-point constant, represented as an integer
16414 int index = vfp3_const_double_index (x);
16415 gcc_assert (index != -1);
16416 fprintf (stream, "%d", index);
16420 /* Print bits representing opcode features for Neon.
16422 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16423 and polynomials as unsigned.
16425 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16427 Bit 2 is 1 for rounding functions, 0 otherwise. */
16429 /* Identify the type as 's', 'u', 'p' or 'f'. */
16432 HOST_WIDE_INT bits = INTVAL (x);
16433 fputc ("uspf"[bits & 3], stream);
16437 /* Likewise, but signed and unsigned integers are both 'i'. */
16440 HOST_WIDE_INT bits = INTVAL (x);
16441 fputc ("iipf"[bits & 3], stream);
16445 /* As for 'T', but emit 'u' instead of 'p'. */
16448 HOST_WIDE_INT bits = INTVAL (x);
16449 fputc ("usuf"[bits & 3], stream);
16453 /* Bit 2: rounding (vs none). */
16456 HOST_WIDE_INT bits = INTVAL (x);
16457 fputs ((bits & 4) != 0 ? "r" : "", stream);
16461 /* Memory operand for vld1/vst1 instruction. */
16465 bool postinc = FALSE;
16466 unsigned align, modesize, align_bits;
16468 gcc_assert (GET_CODE (x) == MEM);
16469 addr = XEXP (x, 0);
16470 if (GET_CODE (addr) == POST_INC)
16473 addr = XEXP (addr, 0);
16475 asm_fprintf (stream, "[%r", REGNO (addr));
16477 /* We know the alignment of this access, so we can emit a hint in the
16478 instruction (for some alignments) as an aid to the memory subsystem
16480 align = MEM_ALIGN (x) >> 3;
16481 modesize = GET_MODE_SIZE (GET_MODE (x));
16483 /* Only certain alignment specifiers are supported by the hardware. */
16484 if (modesize == 16 && (align % 32) == 0)
16486 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16488 else if ((align % 8) == 0)
16493 if (align_bits != 0)
16494 asm_fprintf (stream, ":%d", align_bits);
16496 asm_fprintf (stream, "]");
16499 fputs("!", stream);
16507 gcc_assert (GET_CODE (x) == MEM);
16508 addr = XEXP (x, 0);
16509 gcc_assert (GET_CODE (addr) == REG);
16510 asm_fprintf (stream, "[%r]", REGNO (addr));
16514 /* Translate an S register number into a D register number and element index. */
16517 int mode = GET_MODE (x);
16520 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16522 output_operand_lossage ("invalid operand for code '%c'", code);
16527 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16529 output_operand_lossage ("invalid operand for code '%c'", code);
16533 regno = regno - FIRST_VFP_REGNUM;
16534 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16538 /* Register specifier for vld1.16/vst1.16. Translate the S register
16539 number into a D register number and element index. */
16542 int mode = GET_MODE (x);
16545 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16547 output_operand_lossage ("invalid operand for code '%c'", code);
16552 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16554 output_operand_lossage ("invalid operand for code '%c'", code);
16558 regno = regno - FIRST_VFP_REGNUM;
16559 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16566 output_operand_lossage ("missing operand");
16570 switch (GET_CODE (x))
16573 asm_fprintf (stream, "%r", REGNO (x));
16577 output_memory_reference_mode = GET_MODE (x);
16578 output_address (XEXP (x, 0));
16585 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16586 sizeof (fpstr), 0, 1);
16587 fprintf (stream, "#%s", fpstr);
16590 fprintf (stream, "#%s", fp_immediate_constant (x));
16594 gcc_assert (GET_CODE (x) != NEG);
16595 fputc ('#', stream);
16596 if (GET_CODE (x) == HIGH)
16598 fputs (":lower16:", stream);
16602 output_addr_const (stream, x);
16608 /* Target hook for printing a memory address. */
16610 arm_print_operand_address (FILE *stream, rtx x)
16614 int is_minus = GET_CODE (x) == MINUS;
16616 if (GET_CODE (x) == REG)
16617 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16618 else if (GET_CODE (x) == PLUS || is_minus)
16620 rtx base = XEXP (x, 0);
16621 rtx index = XEXP (x, 1);
16622 HOST_WIDE_INT offset = 0;
16623 if (GET_CODE (base) != REG
16624 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16626 /* Ensure that BASE is a register. */
16627 /* (one of them must be). */
16628 /* Also ensure the SP is not used as in index register. */
16633 switch (GET_CODE (index))
16636 offset = INTVAL (index);
16639 asm_fprintf (stream, "[%r, #%wd]",
16640 REGNO (base), offset);
16644 asm_fprintf (stream, "[%r, %s%r]",
16645 REGNO (base), is_minus ? "-" : "",
16655 asm_fprintf (stream, "[%r, %s%r",
16656 REGNO (base), is_minus ? "-" : "",
16657 REGNO (XEXP (index, 0)));
16658 arm_print_operand (stream, index, 'S');
16659 fputs ("]", stream);
16664 gcc_unreachable ();
16667 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16668 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16670 extern enum machine_mode output_memory_reference_mode;
16672 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16674 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16675 asm_fprintf (stream, "[%r, #%s%d]!",
16676 REGNO (XEXP (x, 0)),
16677 GET_CODE (x) == PRE_DEC ? "-" : "",
16678 GET_MODE_SIZE (output_memory_reference_mode));
16680 asm_fprintf (stream, "[%r], #%s%d",
16681 REGNO (XEXP (x, 0)),
16682 GET_CODE (x) == POST_DEC ? "-" : "",
16683 GET_MODE_SIZE (output_memory_reference_mode));
16685 else if (GET_CODE (x) == PRE_MODIFY)
16687 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16688 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16689 asm_fprintf (stream, "#%wd]!",
16690 INTVAL (XEXP (XEXP (x, 1), 1)));
16692 asm_fprintf (stream, "%r]!",
16693 REGNO (XEXP (XEXP (x, 1), 1)));
16695 else if (GET_CODE (x) == POST_MODIFY)
16697 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16698 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16699 asm_fprintf (stream, "#%wd",
16700 INTVAL (XEXP (XEXP (x, 1), 1)));
16702 asm_fprintf (stream, "%r",
16703 REGNO (XEXP (XEXP (x, 1), 1)));
16705 else output_addr_const (stream, x);
16709 if (GET_CODE (x) == REG)
16710 asm_fprintf (stream, "[%r]", REGNO (x));
16711 else if (GET_CODE (x) == POST_INC)
16712 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16713 else if (GET_CODE (x) == PLUS)
16715 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16716 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16717 asm_fprintf (stream, "[%r, #%wd]",
16718 REGNO (XEXP (x, 0)),
16719 INTVAL (XEXP (x, 1)));
16721 asm_fprintf (stream, "[%r, %r]",
16722 REGNO (XEXP (x, 0)),
16723 REGNO (XEXP (x, 1)));
16726 output_addr_const (stream, x);
16730 /* Target hook for indicating whether a punctuation character for
16731 TARGET_PRINT_OPERAND is valid. */
16733 arm_print_operand_punct_valid_p (unsigned char code)
16735 return (code == '@' || code == '|' || code == '.'
16736 || code == '(' || code == ')' || code == '#'
16737 || (TARGET_32BIT && (code == '?'))
16738 || (TARGET_THUMB2 && (code == '!'))
16739 || (TARGET_THUMB && (code == '_')));
16742 /* Target hook for assembling integer objects. The ARM version needs to
16743 handle word-sized values specially. */
16745 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16747 enum machine_mode mode;
16749 if (size == UNITS_PER_WORD && aligned_p)
16751 fputs ("\t.word\t", asm_out_file);
16752 output_addr_const (asm_out_file, x);
16754 /* Mark symbols as position independent. We only do this in the
16755 .text segment, not in the .data segment. */
16756 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16757 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16759 /* See legitimize_pic_address for an explanation of the
16760 TARGET_VXWORKS_RTP check. */
16761 if (TARGET_VXWORKS_RTP
16762 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16763 fputs ("(GOT)", asm_out_file);
16765 fputs ("(GOTOFF)", asm_out_file);
16767 fputc ('\n', asm_out_file);
16771 mode = GET_MODE (x);
16773 if (arm_vector_mode_supported_p (mode))
16777 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16779 units = CONST_VECTOR_NUNITS (x);
16780 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16782 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16783 for (i = 0; i < units; i++)
16785 rtx elt = CONST_VECTOR_ELT (x, i);
16787 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16790 for (i = 0; i < units; i++)
16792 rtx elt = CONST_VECTOR_ELT (x, i);
16793 REAL_VALUE_TYPE rval;
16795 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16798 (rval, GET_MODE_INNER (mode),
16799 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16805 return default_assemble_integer (x, size, aligned_p);
16809 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16813 if (!TARGET_AAPCS_BASED)
16816 default_named_section_asm_out_constructor
16817 : default_named_section_asm_out_destructor) (symbol, priority);
16821 /* Put these in the .init_array section, using a special relocation. */
16822 if (priority != DEFAULT_INIT_PRIORITY)
16825 sprintf (buf, "%s.%.5u",
16826 is_ctor ? ".init_array" : ".fini_array",
16828 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16835 switch_to_section (s);
16836 assemble_align (POINTER_SIZE);
16837 fputs ("\t.word\t", asm_out_file);
16838 output_addr_const (asm_out_file, symbol);
16839 fputs ("(target1)\n", asm_out_file);
16842 /* Add a function to the list of static constructors. */
16845 arm_elf_asm_constructor (rtx symbol, int priority)
16847 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16850 /* Add a function to the list of static destructors. */
16853 arm_elf_asm_destructor (rtx symbol, int priority)
16855 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16858 /* A finite state machine takes care of noticing whether or not instructions
16859 can be conditionally executed, and thus decrease execution time and code
16860 size by deleting branch instructions. The fsm is controlled by
16861 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16863 /* The state of the fsm controlling condition codes are:
16864 0: normal, do nothing special
16865 1: make ASM_OUTPUT_OPCODE not output this instruction
16866 2: make ASM_OUTPUT_OPCODE not output this instruction
16867 3: make instructions conditional
16868 4: make instructions conditional
16870 State transitions (state->state by whom under condition):
16871 0 -> 1 final_prescan_insn if the `target' is a label
16872 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16873 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16874 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16875 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16876 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16877 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16878 (the target insn is arm_target_insn).
16880 If the jump clobbers the conditions then we use states 2 and 4.
16882 A similar thing can be done with conditional return insns.
16884 XXX In case the `target' is an unconditional branch, this conditionalising
16885 of the instructions always reduces code size, but not always execution
16886 time. But then, I want to reduce the code size to somewhere near what
16887 /bin/cc produces. */
16889 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16890 instructions. When a COND_EXEC instruction is seen the subsequent
16891 instructions are scanned so that multiple conditional instructions can be
16892 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16893 specify the length and true/false mask for the IT block. These will be
16894 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16896 /* Returns the index of the ARM condition code string in
16897 `arm_condition_codes'. COMPARISON should be an rtx like
16898 `(eq (...) (...))'. */
16899 static enum arm_cond_code
16900 get_arm_condition_code (rtx comparison)
16902 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16903 enum arm_cond_code code;
16904 enum rtx_code comp_code = GET_CODE (comparison);
16906 if (GET_MODE_CLASS (mode) != MODE_CC)
16907 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16908 XEXP (comparison, 1));
16912 case CC_DNEmode: code = ARM_NE; goto dominance;
16913 case CC_DEQmode: code = ARM_EQ; goto dominance;
16914 case CC_DGEmode: code = ARM_GE; goto dominance;
16915 case CC_DGTmode: code = ARM_GT; goto dominance;
16916 case CC_DLEmode: code = ARM_LE; goto dominance;
16917 case CC_DLTmode: code = ARM_LT; goto dominance;
16918 case CC_DGEUmode: code = ARM_CS; goto dominance;
16919 case CC_DGTUmode: code = ARM_HI; goto dominance;
16920 case CC_DLEUmode: code = ARM_LS; goto dominance;
16921 case CC_DLTUmode: code = ARM_CC;
16924 gcc_assert (comp_code == EQ || comp_code == NE);
16926 if (comp_code == EQ)
16927 return ARM_INVERSE_CONDITION_CODE (code);
16933 case NE: return ARM_NE;
16934 case EQ: return ARM_EQ;
16935 case GE: return ARM_PL;
16936 case LT: return ARM_MI;
16937 default: gcc_unreachable ();
16943 case NE: return ARM_NE;
16944 case EQ: return ARM_EQ;
16945 default: gcc_unreachable ();
16951 case NE: return ARM_MI;
16952 case EQ: return ARM_PL;
16953 default: gcc_unreachable ();
16958 /* These encodings assume that AC=1 in the FPA system control
16959 byte. This allows us to handle all cases except UNEQ and
16963 case GE: return ARM_GE;
16964 case GT: return ARM_GT;
16965 case LE: return ARM_LS;
16966 case LT: return ARM_MI;
16967 case NE: return ARM_NE;
16968 case EQ: return ARM_EQ;
16969 case ORDERED: return ARM_VC;
16970 case UNORDERED: return ARM_VS;
16971 case UNLT: return ARM_LT;
16972 case UNLE: return ARM_LE;
16973 case UNGT: return ARM_HI;
16974 case UNGE: return ARM_PL;
16975 /* UNEQ and LTGT do not have a representation. */
16976 case UNEQ: /* Fall through. */
16977 case LTGT: /* Fall through. */
16978 default: gcc_unreachable ();
16984 case NE: return ARM_NE;
16985 case EQ: return ARM_EQ;
16986 case GE: return ARM_LE;
16987 case GT: return ARM_LT;
16988 case LE: return ARM_GE;
16989 case LT: return ARM_GT;
16990 case GEU: return ARM_LS;
16991 case GTU: return ARM_CC;
16992 case LEU: return ARM_CS;
16993 case LTU: return ARM_HI;
16994 default: gcc_unreachable ();
17000 case LTU: return ARM_CS;
17001 case GEU: return ARM_CC;
17002 default: gcc_unreachable ();
17008 case NE: return ARM_NE;
17009 case EQ: return ARM_EQ;
17010 case GEU: return ARM_CS;
17011 case GTU: return ARM_HI;
17012 case LEU: return ARM_LS;
17013 case LTU: return ARM_CC;
17014 default: gcc_unreachable ();
17020 case GE: return ARM_GE;
17021 case LT: return ARM_LT;
17022 case GEU: return ARM_CS;
17023 case LTU: return ARM_CC;
17024 default: gcc_unreachable ();
17030 case NE: return ARM_NE;
17031 case EQ: return ARM_EQ;
17032 case GE: return ARM_GE;
17033 case GT: return ARM_GT;
17034 case LE: return ARM_LE;
17035 case LT: return ARM_LT;
17036 case GEU: return ARM_CS;
17037 case GTU: return ARM_HI;
17038 case LEU: return ARM_LS;
17039 case LTU: return ARM_CC;
17040 default: gcc_unreachable ();
17043 default: gcc_unreachable ();
17047 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17050 thumb2_final_prescan_insn (rtx insn)
17052 rtx first_insn = insn;
17053 rtx body = PATTERN (insn);
17055 enum arm_cond_code code;
17059 /* Remove the previous insn from the count of insns to be output. */
17060 if (arm_condexec_count)
17061 arm_condexec_count--;
17063 /* Nothing to do if we are already inside a conditional block. */
17064 if (arm_condexec_count)
17067 if (GET_CODE (body) != COND_EXEC)
17070 /* Conditional jumps are implemented directly. */
17071 if (GET_CODE (insn) == JUMP_INSN)
17074 predicate = COND_EXEC_TEST (body);
17075 arm_current_cc = get_arm_condition_code (predicate);
17077 n = get_attr_ce_count (insn);
17078 arm_condexec_count = 1;
17079 arm_condexec_mask = (1 << n) - 1;
17080 arm_condexec_masklen = n;
17081 /* See if subsequent instructions can be combined into the same block. */
17084 insn = next_nonnote_insn (insn);
17086 /* Jumping into the middle of an IT block is illegal, so a label or
17087 barrier terminates the block. */
17088 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17091 body = PATTERN (insn);
17092 /* USE and CLOBBER aren't really insns, so just skip them. */
17093 if (GET_CODE (body) == USE
17094 || GET_CODE (body) == CLOBBER)
17097 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17098 if (GET_CODE (body) != COND_EXEC)
17100 /* Allow up to 4 conditionally executed instructions in a block. */
17101 n = get_attr_ce_count (insn);
17102 if (arm_condexec_masklen + n > 4)
17105 predicate = COND_EXEC_TEST (body);
17106 code = get_arm_condition_code (predicate);
17107 mask = (1 << n) - 1;
17108 if (arm_current_cc == code)
17109 arm_condexec_mask |= (mask << arm_condexec_masklen);
17110 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17113 arm_condexec_count++;
17114 arm_condexec_masklen += n;
17116 /* A jump must be the last instruction in a conditional block. */
17117 if (GET_CODE(insn) == JUMP_INSN)
17120 /* Restore recog_data (getting the attributes of other insns can
17121 destroy this array, but final.c assumes that it remains intact
17122 across this call). */
17123 extract_constrain_insn_cached (first_insn);
17127 arm_final_prescan_insn (rtx insn)
17129 /* BODY will hold the body of INSN. */
17130 rtx body = PATTERN (insn);
17132 /* This will be 1 if trying to repeat the trick, and things need to be
17133 reversed if it appears to fail. */
17136 /* If we start with a return insn, we only succeed if we find another one. */
17137 int seeking_return = 0;
17139 /* START_INSN will hold the insn from where we start looking. This is the
17140 first insn after the following code_label if REVERSE is true. */
17141 rtx start_insn = insn;
17143 /* If in state 4, check if the target branch is reached, in order to
17144 change back to state 0. */
17145 if (arm_ccfsm_state == 4)
17147 if (insn == arm_target_insn)
17149 arm_target_insn = NULL;
17150 arm_ccfsm_state = 0;
17155 /* If in state 3, it is possible to repeat the trick, if this insn is an
17156 unconditional branch to a label, and immediately following this branch
17157 is the previous target label which is only used once, and the label this
17158 branch jumps to is not too far off. */
17159 if (arm_ccfsm_state == 3)
17161 if (simplejump_p (insn))
17163 start_insn = next_nonnote_insn (start_insn);
17164 if (GET_CODE (start_insn) == BARRIER)
17166 /* XXX Isn't this always a barrier? */
17167 start_insn = next_nonnote_insn (start_insn);
17169 if (GET_CODE (start_insn) == CODE_LABEL
17170 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17171 && LABEL_NUSES (start_insn) == 1)
17176 else if (GET_CODE (body) == RETURN)
17178 start_insn = next_nonnote_insn (start_insn);
17179 if (GET_CODE (start_insn) == BARRIER)
17180 start_insn = next_nonnote_insn (start_insn);
17181 if (GET_CODE (start_insn) == CODE_LABEL
17182 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17183 && LABEL_NUSES (start_insn) == 1)
17186 seeking_return = 1;
17195 gcc_assert (!arm_ccfsm_state || reverse);
17196 if (GET_CODE (insn) != JUMP_INSN)
17199 /* This jump might be paralleled with a clobber of the condition codes
17200 the jump should always come first */
17201 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17202 body = XVECEXP (body, 0, 0);
17205 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17206 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17209 int fail = FALSE, succeed = FALSE;
17210 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17211 int then_not_else = TRUE;
17212 rtx this_insn = start_insn, label = 0;
17214 /* Register the insn jumped to. */
17217 if (!seeking_return)
17218 label = XEXP (SET_SRC (body), 0);
17220 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17221 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17222 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17224 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17225 then_not_else = FALSE;
17227 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17228 seeking_return = 1;
17229 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17231 seeking_return = 1;
17232 then_not_else = FALSE;
17235 gcc_unreachable ();
17237 /* See how many insns this branch skips, and what kind of insns. If all
17238 insns are okay, and the label or unconditional branch to the same
17239 label is not too far away, succeed. */
17240 for (insns_skipped = 0;
17241 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17245 this_insn = next_nonnote_insn (this_insn);
17249 switch (GET_CODE (this_insn))
17252 /* Succeed if it is the target label, otherwise fail since
17253 control falls in from somewhere else. */
17254 if (this_insn == label)
17256 arm_ccfsm_state = 1;
17264 /* Succeed if the following insn is the target label.
17266 If return insns are used then the last insn in a function
17267 will be a barrier. */
17268 this_insn = next_nonnote_insn (this_insn);
17269 if (this_insn && this_insn == label)
17271 arm_ccfsm_state = 1;
17279 /* The AAPCS says that conditional calls should not be
17280 used since they make interworking inefficient (the
17281 linker can't transform BL<cond> into BLX). That's
17282 only a problem if the machine has BLX. */
17289 /* Succeed if the following insn is the target label, or
17290 if the following two insns are a barrier and the
17292 this_insn = next_nonnote_insn (this_insn);
17293 if (this_insn && GET_CODE (this_insn) == BARRIER)
17294 this_insn = next_nonnote_insn (this_insn);
17296 if (this_insn && this_insn == label
17297 && insns_skipped < max_insns_skipped)
17299 arm_ccfsm_state = 1;
17307 /* If this is an unconditional branch to the same label, succeed.
17308 If it is to another label, do nothing. If it is conditional,
17310 /* XXX Probably, the tests for SET and the PC are
17313 scanbody = PATTERN (this_insn);
17314 if (GET_CODE (scanbody) == SET
17315 && GET_CODE (SET_DEST (scanbody)) == PC)
17317 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17318 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17320 arm_ccfsm_state = 2;
17323 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17326 /* Fail if a conditional return is undesirable (e.g. on a
17327 StrongARM), but still allow this if optimizing for size. */
17328 else if (GET_CODE (scanbody) == RETURN
17329 && !use_return_insn (TRUE, NULL)
17332 else if (GET_CODE (scanbody) == RETURN
17335 arm_ccfsm_state = 2;
17338 else if (GET_CODE (scanbody) == PARALLEL)
17340 switch (get_attr_conds (this_insn))
17350 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17355 /* Instructions using or affecting the condition codes make it
17357 scanbody = PATTERN (this_insn);
17358 if (!(GET_CODE (scanbody) == SET
17359 || GET_CODE (scanbody) == PARALLEL)
17360 || get_attr_conds (this_insn) != CONDS_NOCOND)
17363 /* A conditional cirrus instruction must be followed by
17364 a non Cirrus instruction. However, since we
17365 conditionalize instructions in this function and by
17366 the time we get here we can't add instructions
17367 (nops), because shorten_branches() has already been
17368 called, we will disable conditionalizing Cirrus
17369 instructions to be safe. */
17370 if (GET_CODE (scanbody) != USE
17371 && GET_CODE (scanbody) != CLOBBER
17372 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17382 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17383 arm_target_label = CODE_LABEL_NUMBER (label);
17386 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17388 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17390 this_insn = next_nonnote_insn (this_insn);
17391 gcc_assert (!this_insn
17392 || (GET_CODE (this_insn) != BARRIER
17393 && GET_CODE (this_insn) != CODE_LABEL));
17397 /* Oh, dear! we ran off the end.. give up. */
17398 extract_constrain_insn_cached (insn);
17399 arm_ccfsm_state = 0;
17400 arm_target_insn = NULL;
17403 arm_target_insn = this_insn;
17406 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17409 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17411 if (reverse || then_not_else)
17412 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17415 /* Restore recog_data (getting the attributes of other insns can
17416 destroy this array, but final.c assumes that it remains intact
17417 across this call. */
17418 extract_constrain_insn_cached (insn);
17422 /* Output IT instructions. */
17424 thumb2_asm_output_opcode (FILE * stream)
17429 if (arm_condexec_mask)
17431 for (n = 0; n < arm_condexec_masklen; n++)
17432 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17434 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17435 arm_condition_codes[arm_current_cc]);
17436 arm_condexec_mask = 0;
17440 /* Returns true if REGNO is a valid register
17441 for holding a quantity of type MODE. */
17443 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17445 if (GET_MODE_CLASS (mode) == MODE_CC)
17446 return (regno == CC_REGNUM
17447 || (TARGET_HARD_FLOAT && TARGET_VFP
17448 && regno == VFPCC_REGNUM));
17451 /* For the Thumb we only allow values bigger than SImode in
17452 registers 0 - 6, so that there is always a second low
17453 register available to hold the upper part of the value.
17454 We probably we ought to ensure that the register is the
17455 start of an even numbered register pair. */
17456 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17458 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17459 && IS_CIRRUS_REGNUM (regno))
17460 /* We have outlawed SI values in Cirrus registers because they
17461 reside in the lower 32 bits, but SF values reside in the
17462 upper 32 bits. This causes gcc all sorts of grief. We can't
17463 even split the registers into pairs because Cirrus SI values
17464 get sign extended to 64bits-- aldyh. */
17465 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17467 if (TARGET_HARD_FLOAT && TARGET_VFP
17468 && IS_VFP_REGNUM (regno))
17470 if (mode == SFmode || mode == SImode)
17471 return VFP_REGNO_OK_FOR_SINGLE (regno);
17473 if (mode == DFmode)
17474 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17476 /* VFP registers can hold HFmode values, but there is no point in
17477 putting them there unless we have hardware conversion insns. */
17478 if (mode == HFmode)
17479 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17482 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17483 || (VALID_NEON_QREG_MODE (mode)
17484 && NEON_REGNO_OK_FOR_QUAD (regno))
17485 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17486 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17487 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17488 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17489 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17494 if (TARGET_REALLY_IWMMXT)
17496 if (IS_IWMMXT_GR_REGNUM (regno))
17497 return mode == SImode;
17499 if (IS_IWMMXT_REGNUM (regno))
17500 return VALID_IWMMXT_REG_MODE (mode);
17503 /* We allow almost any value to be stored in the general registers.
17504 Restrict doubleword quantities to even register pairs so that we can
17505 use ldrd. Do not allow very large Neon structure opaque modes in
17506 general registers; they would use too many. */
17507 if (regno <= LAST_ARM_REGNUM)
17508 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17509 && ARM_NUM_REGS (mode) <= 4;
17511 if (regno == FRAME_POINTER_REGNUM
17512 || regno == ARG_POINTER_REGNUM)
17513 /* We only allow integers in the fake hard registers. */
17514 return GET_MODE_CLASS (mode) == MODE_INT;
17516 /* The only registers left are the FPA registers
17517 which we only allow to hold FP values. */
17518 return (TARGET_HARD_FLOAT && TARGET_FPA
17519 && GET_MODE_CLASS (mode) == MODE_FLOAT
17520 && regno >= FIRST_FPA_REGNUM
17521 && regno <= LAST_FPA_REGNUM);
17524 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17525 not used in arm mode. */
17528 arm_regno_class (int regno)
17532 if (regno == STACK_POINTER_REGNUM)
17534 if (regno == CC_REGNUM)
17541 if (TARGET_THUMB2 && regno < 8)
17544 if ( regno <= LAST_ARM_REGNUM
17545 || regno == FRAME_POINTER_REGNUM
17546 || regno == ARG_POINTER_REGNUM)
17547 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17549 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17550 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17552 if (IS_CIRRUS_REGNUM (regno))
17553 return CIRRUS_REGS;
17555 if (IS_VFP_REGNUM (regno))
17557 if (regno <= D7_VFP_REGNUM)
17558 return VFP_D0_D7_REGS;
17559 else if (regno <= LAST_LO_VFP_REGNUM)
17560 return VFP_LO_REGS;
17562 return VFP_HI_REGS;
17565 if (IS_IWMMXT_REGNUM (regno))
17566 return IWMMXT_REGS;
17568 if (IS_IWMMXT_GR_REGNUM (regno))
17569 return IWMMXT_GR_REGS;
17574 /* Handle a special case when computing the offset
17575 of an argument from the frame pointer. */
17577 arm_debugger_arg_offset (int value, rtx addr)
17581 /* We are only interested if dbxout_parms() failed to compute the offset. */
17585 /* We can only cope with the case where the address is held in a register. */
17586 if (GET_CODE (addr) != REG)
17589 /* If we are using the frame pointer to point at the argument, then
17590 an offset of 0 is correct. */
17591 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17594 /* If we are using the stack pointer to point at the
17595 argument, then an offset of 0 is correct. */
17596 /* ??? Check this is consistent with thumb2 frame layout. */
17597 if ((TARGET_THUMB || !frame_pointer_needed)
17598 && REGNO (addr) == SP_REGNUM)
17601 /* Oh dear. The argument is pointed to by a register rather
17602 than being held in a register, or being stored at a known
17603 offset from the frame pointer. Since GDB only understands
17604 those two kinds of argument we must translate the address
17605 held in the register into an offset from the frame pointer.
17606 We do this by searching through the insns for the function
17607 looking to see where this register gets its value. If the
17608 register is initialized from the frame pointer plus an offset
17609 then we are in luck and we can continue, otherwise we give up.
17611 This code is exercised by producing debugging information
17612 for a function with arguments like this:
17614 double func (double a, double b, int c, double d) {return d;}
17616 Without this code the stab for parameter 'd' will be set to
17617 an offset of 0 from the frame pointer, rather than 8. */
17619 /* The if() statement says:
17621 If the insn is a normal instruction
17622 and if the insn is setting the value in a register
17623 and if the register being set is the register holding the address of the argument
17624 and if the address is computing by an addition
17625 that involves adding to a register
17626 which is the frame pointer
17631 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17633 if ( GET_CODE (insn) == INSN
17634 && GET_CODE (PATTERN (insn)) == SET
17635 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17636 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17637 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17638 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17639 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17642 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17651 warning (0, "unable to compute real location of stacked parameter");
17652 value = 8; /* XXX magic hack */
17658 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17661 if ((MASK) & insn_flags) \
17662 add_builtin_function ((NAME), (TYPE), (CODE), \
17663 BUILT_IN_MD, NULL, NULL_TREE); \
17667 struct builtin_description
17669 const unsigned int mask;
17670 const enum insn_code icode;
17671 const char * const name;
17672 const enum arm_builtins code;
17673 const enum rtx_code comparison;
17674 const unsigned int flag;
17677 static const struct builtin_description bdesc_2arg[] =
17679 #define IWMMXT_BUILTIN(code, string, builtin) \
17680 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17681 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17683 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17684 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17685 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17686 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17687 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17688 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17689 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17690 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17691 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17692 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17693 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17694 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17695 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17696 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17697 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17698 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17699 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17700 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17701 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17702 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17703 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17704 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17705 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17706 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17707 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17708 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17709 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17710 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17711 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17712 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17713 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17714 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17715 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17716 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17717 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17718 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17719 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17720 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17721 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17722 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17723 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17724 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17725 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17726 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17727 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17728 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17729 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17730 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17731 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17732 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17733 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17734 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17735 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17736 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17737 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17738 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17739 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17740 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17742 #define IWMMXT_BUILTIN2(code, builtin) \
17743 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17745 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17746 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17747 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17748 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17749 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17750 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17751 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17752 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17753 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17754 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17755 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17756 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17757 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17758 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17759 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17760 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17761 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17762 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17763 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17764 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17765 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17766 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17767 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17768 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17769 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17770 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17771 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17772 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17773 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17774 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17775 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17776 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17779 static const struct builtin_description bdesc_1arg[] =
17781 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17782 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17783 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17784 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17785 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17786 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17787 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17788 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17789 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17790 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17791 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17792 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17793 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17794 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17795 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17796 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17797 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17798 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17801 /* Set up all the iWMMXt builtins. This is
17802 not called if TARGET_IWMMXT is zero. */
17805 arm_init_iwmmxt_builtins (void)
17807 const struct builtin_description * d;
17809 tree endlink = void_list_node;
17811 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17812 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17813 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17816 = build_function_type (integer_type_node,
17817 tree_cons (NULL_TREE, integer_type_node, endlink));
17818 tree v8qi_ftype_v8qi_v8qi_int
17819 = build_function_type (V8QI_type_node,
17820 tree_cons (NULL_TREE, V8QI_type_node,
17821 tree_cons (NULL_TREE, V8QI_type_node,
17822 tree_cons (NULL_TREE,
17825 tree v4hi_ftype_v4hi_int
17826 = build_function_type (V4HI_type_node,
17827 tree_cons (NULL_TREE, V4HI_type_node,
17828 tree_cons (NULL_TREE, integer_type_node,
17830 tree v2si_ftype_v2si_int
17831 = build_function_type (V2SI_type_node,
17832 tree_cons (NULL_TREE, V2SI_type_node,
17833 tree_cons (NULL_TREE, integer_type_node,
17835 tree v2si_ftype_di_di
17836 = build_function_type (V2SI_type_node,
17837 tree_cons (NULL_TREE, long_long_integer_type_node,
17838 tree_cons (NULL_TREE, long_long_integer_type_node,
17840 tree di_ftype_di_int
17841 = build_function_type (long_long_integer_type_node,
17842 tree_cons (NULL_TREE, long_long_integer_type_node,
17843 tree_cons (NULL_TREE, integer_type_node,
17845 tree di_ftype_di_int_int
17846 = build_function_type (long_long_integer_type_node,
17847 tree_cons (NULL_TREE, long_long_integer_type_node,
17848 tree_cons (NULL_TREE, integer_type_node,
17849 tree_cons (NULL_TREE,
17852 tree int_ftype_v8qi
17853 = build_function_type (integer_type_node,
17854 tree_cons (NULL_TREE, V8QI_type_node,
17856 tree int_ftype_v4hi
17857 = build_function_type (integer_type_node,
17858 tree_cons (NULL_TREE, V4HI_type_node,
17860 tree int_ftype_v2si
17861 = build_function_type (integer_type_node,
17862 tree_cons (NULL_TREE, V2SI_type_node,
17864 tree int_ftype_v8qi_int
17865 = build_function_type (integer_type_node,
17866 tree_cons (NULL_TREE, V8QI_type_node,
17867 tree_cons (NULL_TREE, integer_type_node,
17869 tree int_ftype_v4hi_int
17870 = build_function_type (integer_type_node,
17871 tree_cons (NULL_TREE, V4HI_type_node,
17872 tree_cons (NULL_TREE, integer_type_node,
17874 tree int_ftype_v2si_int
17875 = build_function_type (integer_type_node,
17876 tree_cons (NULL_TREE, V2SI_type_node,
17877 tree_cons (NULL_TREE, integer_type_node,
17879 tree v8qi_ftype_v8qi_int_int
17880 = build_function_type (V8QI_type_node,
17881 tree_cons (NULL_TREE, V8QI_type_node,
17882 tree_cons (NULL_TREE, integer_type_node,
17883 tree_cons (NULL_TREE,
17886 tree v4hi_ftype_v4hi_int_int
17887 = build_function_type (V4HI_type_node,
17888 tree_cons (NULL_TREE, V4HI_type_node,
17889 tree_cons (NULL_TREE, integer_type_node,
17890 tree_cons (NULL_TREE,
17893 tree v2si_ftype_v2si_int_int
17894 = build_function_type (V2SI_type_node,
17895 tree_cons (NULL_TREE, V2SI_type_node,
17896 tree_cons (NULL_TREE, integer_type_node,
17897 tree_cons (NULL_TREE,
17900 /* Miscellaneous. */
17901 tree v8qi_ftype_v4hi_v4hi
17902 = build_function_type (V8QI_type_node,
17903 tree_cons (NULL_TREE, V4HI_type_node,
17904 tree_cons (NULL_TREE, V4HI_type_node,
17906 tree v4hi_ftype_v2si_v2si
17907 = build_function_type (V4HI_type_node,
17908 tree_cons (NULL_TREE, V2SI_type_node,
17909 tree_cons (NULL_TREE, V2SI_type_node,
17911 tree v2si_ftype_v4hi_v4hi
17912 = build_function_type (V2SI_type_node,
17913 tree_cons (NULL_TREE, V4HI_type_node,
17914 tree_cons (NULL_TREE, V4HI_type_node,
17916 tree v2si_ftype_v8qi_v8qi
17917 = build_function_type (V2SI_type_node,
17918 tree_cons (NULL_TREE, V8QI_type_node,
17919 tree_cons (NULL_TREE, V8QI_type_node,
17921 tree v4hi_ftype_v4hi_di
17922 = build_function_type (V4HI_type_node,
17923 tree_cons (NULL_TREE, V4HI_type_node,
17924 tree_cons (NULL_TREE,
17925 long_long_integer_type_node,
17927 tree v2si_ftype_v2si_di
17928 = build_function_type (V2SI_type_node,
17929 tree_cons (NULL_TREE, V2SI_type_node,
17930 tree_cons (NULL_TREE,
17931 long_long_integer_type_node,
17933 tree void_ftype_int_int
17934 = build_function_type (void_type_node,
17935 tree_cons (NULL_TREE, integer_type_node,
17936 tree_cons (NULL_TREE, integer_type_node,
17939 = build_function_type (long_long_unsigned_type_node, endlink);
17941 = build_function_type (long_long_integer_type_node,
17942 tree_cons (NULL_TREE, V8QI_type_node,
17945 = build_function_type (long_long_integer_type_node,
17946 tree_cons (NULL_TREE, V4HI_type_node,
17949 = build_function_type (long_long_integer_type_node,
17950 tree_cons (NULL_TREE, V2SI_type_node,
17952 tree v2si_ftype_v4hi
17953 = build_function_type (V2SI_type_node,
17954 tree_cons (NULL_TREE, V4HI_type_node,
17956 tree v4hi_ftype_v8qi
17957 = build_function_type (V4HI_type_node,
17958 tree_cons (NULL_TREE, V8QI_type_node,
17961 tree di_ftype_di_v4hi_v4hi
17962 = build_function_type (long_long_unsigned_type_node,
17963 tree_cons (NULL_TREE,
17964 long_long_unsigned_type_node,
17965 tree_cons (NULL_TREE, V4HI_type_node,
17966 tree_cons (NULL_TREE,
17970 tree di_ftype_v4hi_v4hi
17971 = build_function_type (long_long_unsigned_type_node,
17972 tree_cons (NULL_TREE, V4HI_type_node,
17973 tree_cons (NULL_TREE, V4HI_type_node,
17976 /* Normal vector binops. */
17977 tree v8qi_ftype_v8qi_v8qi
17978 = build_function_type (V8QI_type_node,
17979 tree_cons (NULL_TREE, V8QI_type_node,
17980 tree_cons (NULL_TREE, V8QI_type_node,
17982 tree v4hi_ftype_v4hi_v4hi
17983 = build_function_type (V4HI_type_node,
17984 tree_cons (NULL_TREE, V4HI_type_node,
17985 tree_cons (NULL_TREE, V4HI_type_node,
17987 tree v2si_ftype_v2si_v2si
17988 = build_function_type (V2SI_type_node,
17989 tree_cons (NULL_TREE, V2SI_type_node,
17990 tree_cons (NULL_TREE, V2SI_type_node,
17992 tree di_ftype_di_di
17993 = build_function_type (long_long_unsigned_type_node,
17994 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17995 tree_cons (NULL_TREE,
17996 long_long_unsigned_type_node,
17999 /* Add all builtins that are more or less simple operations on two
18001 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18003 /* Use one of the operands; the target can have a different mode for
18004 mask-generating compares. */
18005 enum machine_mode mode;
18011 mode = insn_data[d->icode].operand[1].mode;
18016 type = v8qi_ftype_v8qi_v8qi;
18019 type = v4hi_ftype_v4hi_v4hi;
18022 type = v2si_ftype_v2si_v2si;
18025 type = di_ftype_di_di;
18029 gcc_unreachable ();
18032 def_mbuiltin (d->mask, d->name, type, d->code);
18035 /* Add the remaining MMX insns with somewhat more complicated types. */
18036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18054 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18065 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18077 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18078 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18079 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18082 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18083 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18086 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18090 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18091 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18093 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18094 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18095 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18096 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18097 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18098 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18100 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18101 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18102 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18103 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18104 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18105 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18106 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18107 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18108 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18109 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18110 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18111 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18113 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18114 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18115 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18116 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18118 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18120 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18121 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18122 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18123 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18124 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18128 arm_init_tls_builtins (void)
18132 ftype = build_function_type (ptr_type_node, void_list_node);
18133 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18134 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18136 TREE_NOTHROW (decl) = 1;
18137 TREE_READONLY (decl) = 1;
18140 enum neon_builtin_type_bits {
18156 #define v8qi_UP T_V8QI
18157 #define v4hi_UP T_V4HI
18158 #define v2si_UP T_V2SI
18159 #define v2sf_UP T_V2SF
18161 #define v16qi_UP T_V16QI
18162 #define v8hi_UP T_V8HI
18163 #define v4si_UP T_V4SI
18164 #define v4sf_UP T_V4SF
18165 #define v2di_UP T_V2DI
18170 #define UP(X) X##_UP
18205 NEON_LOADSTRUCTLANE,
18207 NEON_STORESTRUCTLANE,
18216 const neon_itype itype;
18218 const enum insn_code codes[T_MAX];
18219 const unsigned int num_vars;
18220 unsigned int base_fcode;
18221 } neon_builtin_datum;
18223 #define CF(N,X) CODE_FOR_neon_##N##X
18225 #define VAR1(T, N, A) \
18226 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18227 #define VAR2(T, N, A, B) \
18228 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18229 #define VAR3(T, N, A, B, C) \
18230 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18231 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18232 #define VAR4(T, N, A, B, C, D) \
18233 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18234 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18235 #define VAR5(T, N, A, B, C, D, E) \
18236 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18237 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18238 #define VAR6(T, N, A, B, C, D, E, F) \
18239 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18240 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18241 #define VAR7(T, N, A, B, C, D, E, F, G) \
18242 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18243 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18245 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18246 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18248 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18249 CF (N, G), CF (N, H) }, 8, 0
18250 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18251 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18252 | UP (H) | UP (I), \
18253 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18254 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18255 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18256 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18257 | UP (H) | UP (I) | UP (J), \
18258 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18259 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18261 /* The mode entries in the following table correspond to the "key" type of the
18262 instruction variant, i.e. equivalent to that which would be specified after
18263 the assembler mnemonic, which usually refers to the last vector operand.
18264 (Signed/unsigned/polynomial types are not differentiated between though, and
18265 are all mapped onto the same mode for a given element size.) The modes
18266 listed per instruction should be the same as those defined for that
18267 instruction's pattern in neon.md.
18268 WARNING: Variants should be listed in the same increasing order as
18269 neon_builtin_type_bits. */
18271 static neon_builtin_datum neon_builtin_data[] =
18273 { VAR10 (BINOP, vadd,
18274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18275 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18276 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18277 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18278 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18279 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18280 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18281 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18282 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18283 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18284 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18285 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18286 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18287 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18288 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18289 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18290 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18291 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18292 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18293 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18294 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18295 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18296 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18297 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18298 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18299 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18300 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18301 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18302 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18303 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18304 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18305 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18306 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18307 { VAR10 (BINOP, vsub,
18308 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18309 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18310 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18311 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18312 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18313 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18314 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18315 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18316 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18317 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18318 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18319 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18320 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18321 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18322 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18323 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18324 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18325 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18326 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18327 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18328 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18329 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18330 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18331 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18332 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18333 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18334 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18335 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18336 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18337 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18338 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18339 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18340 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18341 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18342 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18343 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18344 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18345 /* FIXME: vget_lane supports more variants than this! */
18346 { VAR10 (GETLANE, vget_lane,
18347 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18348 { VAR10 (SETLANE, vset_lane,
18349 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18350 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18351 { VAR10 (DUP, vdup_n,
18352 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18353 { VAR10 (DUPLANE, vdup_lane,
18354 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18355 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18356 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18357 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18358 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18359 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18360 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18361 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18362 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18363 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18364 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18365 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18366 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18367 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18368 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18369 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18370 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18371 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18372 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18373 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18374 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18375 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18376 { VAR10 (BINOP, vext,
18377 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18378 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18379 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18380 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18381 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18382 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18383 { VAR10 (SELECT, vbsl,
18384 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18385 { VAR1 (VTBL, vtbl1, v8qi) },
18386 { VAR1 (VTBL, vtbl2, v8qi) },
18387 { VAR1 (VTBL, vtbl3, v8qi) },
18388 { VAR1 (VTBL, vtbl4, v8qi) },
18389 { VAR1 (VTBX, vtbx1, v8qi) },
18390 { VAR1 (VTBX, vtbx2, v8qi) },
18391 { VAR1 (VTBX, vtbx3, v8qi) },
18392 { VAR1 (VTBX, vtbx4, v8qi) },
18393 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18394 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18395 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18396 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18397 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18398 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18399 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18400 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18401 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18402 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18403 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18404 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18405 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18406 { VAR10 (LOAD1, vld1,
18407 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18408 { VAR10 (LOAD1LANE, vld1_lane,
18409 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18410 { VAR10 (LOAD1, vld1_dup,
18411 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18412 { VAR10 (STORE1, vst1,
18413 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18414 { VAR10 (STORE1LANE, vst1_lane,
18415 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18416 { VAR9 (LOADSTRUCT,
18417 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18418 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18419 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18420 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18421 { VAR9 (STORESTRUCT, vst2,
18422 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18423 { VAR7 (STORESTRUCTLANE, vst2_lane,
18424 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18425 { VAR9 (LOADSTRUCT,
18426 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18427 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18428 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18429 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18430 { VAR9 (STORESTRUCT, vst3,
18431 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18432 { VAR7 (STORESTRUCTLANE, vst3_lane,
18433 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18434 { VAR9 (LOADSTRUCT, vld4,
18435 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18436 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18437 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18438 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18439 { VAR9 (STORESTRUCT, vst4,
18440 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18441 { VAR7 (STORESTRUCTLANE, vst4_lane,
18442 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18443 { VAR10 (LOGICBINOP, vand,
18444 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18445 { VAR10 (LOGICBINOP, vorr,
18446 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18447 { VAR10 (BINOP, veor,
18448 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18449 { VAR10 (LOGICBINOP, vbic,
18450 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18451 { VAR10 (LOGICBINOP, vorn,
18452 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18468 arm_init_neon_builtins (void)
18470 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18472 tree neon_intQI_type_node;
18473 tree neon_intHI_type_node;
18474 tree neon_polyQI_type_node;
18475 tree neon_polyHI_type_node;
18476 tree neon_intSI_type_node;
18477 tree neon_intDI_type_node;
18478 tree neon_float_type_node;
18480 tree intQI_pointer_node;
18481 tree intHI_pointer_node;
18482 tree intSI_pointer_node;
18483 tree intDI_pointer_node;
18484 tree float_pointer_node;
18486 tree const_intQI_node;
18487 tree const_intHI_node;
18488 tree const_intSI_node;
18489 tree const_intDI_node;
18490 tree const_float_node;
18492 tree const_intQI_pointer_node;
18493 tree const_intHI_pointer_node;
18494 tree const_intSI_pointer_node;
18495 tree const_intDI_pointer_node;
18496 tree const_float_pointer_node;
18498 tree V8QI_type_node;
18499 tree V4HI_type_node;
18500 tree V2SI_type_node;
18501 tree V2SF_type_node;
18502 tree V16QI_type_node;
18503 tree V8HI_type_node;
18504 tree V4SI_type_node;
18505 tree V4SF_type_node;
18506 tree V2DI_type_node;
18508 tree intUQI_type_node;
18509 tree intUHI_type_node;
18510 tree intUSI_type_node;
18511 tree intUDI_type_node;
18513 tree intEI_type_node;
18514 tree intOI_type_node;
18515 tree intCI_type_node;
18516 tree intXI_type_node;
18518 tree V8QI_pointer_node;
18519 tree V4HI_pointer_node;
18520 tree V2SI_pointer_node;
18521 tree V2SF_pointer_node;
18522 tree V16QI_pointer_node;
18523 tree V8HI_pointer_node;
18524 tree V4SI_pointer_node;
18525 tree V4SF_pointer_node;
18526 tree V2DI_pointer_node;
18528 tree void_ftype_pv8qi_v8qi_v8qi;
18529 tree void_ftype_pv4hi_v4hi_v4hi;
18530 tree void_ftype_pv2si_v2si_v2si;
18531 tree void_ftype_pv2sf_v2sf_v2sf;
18532 tree void_ftype_pdi_di_di;
18533 tree void_ftype_pv16qi_v16qi_v16qi;
18534 tree void_ftype_pv8hi_v8hi_v8hi;
18535 tree void_ftype_pv4si_v4si_v4si;
18536 tree void_ftype_pv4sf_v4sf_v4sf;
18537 tree void_ftype_pv2di_v2di_v2di;
18539 tree reinterp_ftype_dreg[5][5];
18540 tree reinterp_ftype_qreg[5][5];
18541 tree dreg_types[5], qreg_types[5];
18543 /* Create distinguished type nodes for NEON vector element types,
18544 and pointers to values of such types, so we can detect them later. */
18545 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18546 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18547 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18548 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18549 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18550 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18551 neon_float_type_node = make_node (REAL_TYPE);
18552 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18553 layout_type (neon_float_type_node);
18555 /* Define typedefs which exactly correspond to the modes we are basing vector
18556 types on. If you change these names you'll need to change
18557 the table used by arm_mangle_type too. */
18558 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18559 "__builtin_neon_qi");
18560 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18561 "__builtin_neon_hi");
18562 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18563 "__builtin_neon_si");
18564 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18565 "__builtin_neon_sf");
18566 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18567 "__builtin_neon_di");
18568 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18569 "__builtin_neon_poly8");
18570 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18571 "__builtin_neon_poly16");
18573 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18574 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18575 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18576 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18577 float_pointer_node = build_pointer_type (neon_float_type_node);
18579 /* Next create constant-qualified versions of the above types. */
18580 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18582 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18584 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18586 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18588 const_float_node = build_qualified_type (neon_float_type_node,
18591 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18592 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18593 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18594 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18595 const_float_pointer_node = build_pointer_type (const_float_node);
18597 /* Now create vector types based on our NEON element types. */
18598 /* 64-bit vectors. */
18600 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18602 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18604 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18606 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18607 /* 128-bit vectors. */
18609 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18611 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18613 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18615 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18617 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18619 /* Unsigned integer types for various mode sizes. */
18620 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18621 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18622 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18623 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18625 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18626 "__builtin_neon_uqi");
18627 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18628 "__builtin_neon_uhi");
18629 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18630 "__builtin_neon_usi");
18631 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18632 "__builtin_neon_udi");
18634 /* Opaque integer types for structures of vectors. */
18635 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18636 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18637 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18638 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18640 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18641 "__builtin_neon_ti");
18642 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18643 "__builtin_neon_ei");
18644 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18645 "__builtin_neon_oi");
18646 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18647 "__builtin_neon_ci");
18648 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18649 "__builtin_neon_xi");
18651 /* Pointers to vector types. */
18652 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18653 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18654 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18655 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18656 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18657 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18658 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18659 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18660 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18662 /* Operations which return results as pairs. */
18663 void_ftype_pv8qi_v8qi_v8qi =
18664 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18665 V8QI_type_node, NULL);
18666 void_ftype_pv4hi_v4hi_v4hi =
18667 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18668 V4HI_type_node, NULL);
18669 void_ftype_pv2si_v2si_v2si =
18670 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18671 V2SI_type_node, NULL);
18672 void_ftype_pv2sf_v2sf_v2sf =
18673 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18674 V2SF_type_node, NULL);
18675 void_ftype_pdi_di_di =
18676 build_function_type_list (void_type_node, intDI_pointer_node,
18677 neon_intDI_type_node, neon_intDI_type_node, NULL);
18678 void_ftype_pv16qi_v16qi_v16qi =
18679 build_function_type_list (void_type_node, V16QI_pointer_node,
18680 V16QI_type_node, V16QI_type_node, NULL);
18681 void_ftype_pv8hi_v8hi_v8hi =
18682 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18683 V8HI_type_node, NULL);
18684 void_ftype_pv4si_v4si_v4si =
18685 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18686 V4SI_type_node, NULL);
18687 void_ftype_pv4sf_v4sf_v4sf =
18688 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18689 V4SF_type_node, NULL);
18690 void_ftype_pv2di_v2di_v2di =
18691 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18692 V2DI_type_node, NULL);
18694 dreg_types[0] = V8QI_type_node;
18695 dreg_types[1] = V4HI_type_node;
18696 dreg_types[2] = V2SI_type_node;
18697 dreg_types[3] = V2SF_type_node;
18698 dreg_types[4] = neon_intDI_type_node;
18700 qreg_types[0] = V16QI_type_node;
18701 qreg_types[1] = V8HI_type_node;
18702 qreg_types[2] = V4SI_type_node;
18703 qreg_types[3] = V4SF_type_node;
18704 qreg_types[4] = V2DI_type_node;
18706 for (i = 0; i < 5; i++)
18709 for (j = 0; j < 5; j++)
18711 reinterp_ftype_dreg[i][j]
18712 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18713 reinterp_ftype_qreg[i][j]
18714 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18718 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18720 neon_builtin_datum *d = &neon_builtin_data[i];
18721 unsigned int j, codeidx = 0;
18723 d->base_fcode = fcode;
18725 for (j = 0; j < T_MAX; j++)
18727 const char* const modenames[] = {
18728 "v8qi", "v4hi", "v2si", "v2sf", "di",
18729 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18733 enum insn_code icode;
18734 int is_load = 0, is_store = 0;
18736 if ((d->bits & (1 << j)) == 0)
18739 icode = d->codes[codeidx++];
18744 case NEON_LOAD1LANE:
18745 case NEON_LOADSTRUCT:
18746 case NEON_LOADSTRUCTLANE:
18748 /* Fall through. */
18750 case NEON_STORE1LANE:
18751 case NEON_STORESTRUCT:
18752 case NEON_STORESTRUCTLANE:
18755 /* Fall through. */
18758 case NEON_LOGICBINOP:
18759 case NEON_SHIFTINSERT:
18766 case NEON_SHIFTIMM:
18767 case NEON_SHIFTACC:
18773 case NEON_LANEMULL:
18774 case NEON_LANEMULH:
18776 case NEON_SCALARMUL:
18777 case NEON_SCALARMULL:
18778 case NEON_SCALARMULH:
18779 case NEON_SCALARMAC:
18785 tree return_type = void_type_node, args = void_list_node;
18787 /* Build a function type directly from the insn_data for this
18788 builtin. The build_function_type() function takes care of
18789 removing duplicates for us. */
18790 for (k = insn_data[icode].n_generator_args - 1; k >= 0; k--)
18794 if (is_load && k == 1)
18796 /* Neon load patterns always have the memory operand
18797 (a SImode pointer) in the operand 1 position. We
18798 want a const pointer to the element type in that
18800 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18806 eltype = const_intQI_pointer_node;
18811 eltype = const_intHI_pointer_node;
18816 eltype = const_intSI_pointer_node;
18821 eltype = const_float_pointer_node;
18826 eltype = const_intDI_pointer_node;
18829 default: gcc_unreachable ();
18832 else if (is_store && k == 0)
18834 /* Similarly, Neon store patterns use operand 0 as
18835 the memory location to store to (a SImode pointer).
18836 Use a pointer to the element type of the store in
18838 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18844 eltype = intQI_pointer_node;
18849 eltype = intHI_pointer_node;
18854 eltype = intSI_pointer_node;
18859 eltype = float_pointer_node;
18864 eltype = intDI_pointer_node;
18867 default: gcc_unreachable ();
18872 switch (insn_data[icode].operand[k].mode)
18874 case VOIDmode: eltype = void_type_node; break;
18876 case QImode: eltype = neon_intQI_type_node; break;
18877 case HImode: eltype = neon_intHI_type_node; break;
18878 case SImode: eltype = neon_intSI_type_node; break;
18879 case SFmode: eltype = neon_float_type_node; break;
18880 case DImode: eltype = neon_intDI_type_node; break;
18881 case TImode: eltype = intTI_type_node; break;
18882 case EImode: eltype = intEI_type_node; break;
18883 case OImode: eltype = intOI_type_node; break;
18884 case CImode: eltype = intCI_type_node; break;
18885 case XImode: eltype = intXI_type_node; break;
18886 /* 64-bit vectors. */
18887 case V8QImode: eltype = V8QI_type_node; break;
18888 case V4HImode: eltype = V4HI_type_node; break;
18889 case V2SImode: eltype = V2SI_type_node; break;
18890 case V2SFmode: eltype = V2SF_type_node; break;
18891 /* 128-bit vectors. */
18892 case V16QImode: eltype = V16QI_type_node; break;
18893 case V8HImode: eltype = V8HI_type_node; break;
18894 case V4SImode: eltype = V4SI_type_node; break;
18895 case V4SFmode: eltype = V4SF_type_node; break;
18896 case V2DImode: eltype = V2DI_type_node; break;
18897 default: gcc_unreachable ();
18901 if (k == 0 && !is_store)
18902 return_type = eltype;
18904 args = tree_cons (NULL_TREE, eltype, args);
18907 ftype = build_function_type (return_type, args);
18911 case NEON_RESULTPAIR:
18913 switch (insn_data[icode].operand[1].mode)
18915 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18916 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18917 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18918 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18919 case DImode: ftype = void_ftype_pdi_di_di; break;
18920 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18921 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18922 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18923 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18924 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18925 default: gcc_unreachable ();
18930 case NEON_REINTERP:
18932 /* We iterate over 5 doubleword types, then 5 quadword
18935 switch (insn_data[icode].operand[0].mode)
18937 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18938 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18939 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18940 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18941 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18942 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18943 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18944 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18945 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18946 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18947 default: gcc_unreachable ();
18953 gcc_unreachable ();
18956 gcc_assert (ftype != NULL);
18958 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18960 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18967 arm_init_fp16_builtins (void)
18969 tree fp16_type = make_node (REAL_TYPE);
18970 TYPE_PRECISION (fp16_type) = 16;
18971 layout_type (fp16_type);
18972 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18976 arm_init_builtins (void)
18978 arm_init_tls_builtins ();
18980 if (TARGET_REALLY_IWMMXT)
18981 arm_init_iwmmxt_builtins ();
18984 arm_init_neon_builtins ();
18986 if (arm_fp16_format)
18987 arm_init_fp16_builtins ();
18990 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18992 static const char *
18993 arm_invalid_parameter_type (const_tree t)
18995 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18996 return N_("function parameters cannot have __fp16 type");
19000 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19002 static const char *
19003 arm_invalid_return_type (const_tree t)
19005 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19006 return N_("functions cannot return __fp16 type");
19010 /* Implement TARGET_PROMOTED_TYPE. */
19013 arm_promoted_type (const_tree t)
19015 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19016 return float_type_node;
19020 /* Implement TARGET_CONVERT_TO_TYPE.
19021 Specifically, this hook implements the peculiarity of the ARM
19022 half-precision floating-point C semantics that requires conversions between
19023 __fp16 to or from double to do an intermediate conversion to float. */
19026 arm_convert_to_type (tree type, tree expr)
19028 tree fromtype = TREE_TYPE (expr);
19029 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19031 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19032 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19033 return convert (type, convert (float_type_node, expr));
19037 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19038 This simply adds HFmode as a supported mode; even though we don't
19039 implement arithmetic on this type directly, it's supported by
19040 optabs conversions, much the way the double-word arithmetic is
19041 special-cased in the default hook. */
19044 arm_scalar_mode_supported_p (enum machine_mode mode)
19046 if (mode == HFmode)
19047 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19049 return default_scalar_mode_supported_p (mode);
19052 /* Errors in the source file can cause expand_expr to return const0_rtx
19053 where we expect a vector. To avoid crashing, use one of the vector
19054 clear instructions. */
19057 safe_vector_operand (rtx x, enum machine_mode mode)
19059 if (x != const0_rtx)
19061 x = gen_reg_rtx (mode);
19063 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19064 : gen_rtx_SUBREG (DImode, x, 0)));
19068 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19071 arm_expand_binop_builtin (enum insn_code icode,
19072 tree exp, rtx target)
19075 tree arg0 = CALL_EXPR_ARG (exp, 0);
19076 tree arg1 = CALL_EXPR_ARG (exp, 1);
19077 rtx op0 = expand_normal (arg0);
19078 rtx op1 = expand_normal (arg1);
19079 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19080 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19081 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19083 if (VECTOR_MODE_P (mode0))
19084 op0 = safe_vector_operand (op0, mode0);
19085 if (VECTOR_MODE_P (mode1))
19086 op1 = safe_vector_operand (op1, mode1);
19089 || GET_MODE (target) != tmode
19090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19091 target = gen_reg_rtx (tmode);
19093 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19096 op0 = copy_to_mode_reg (mode0, op0);
19097 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19098 op1 = copy_to_mode_reg (mode1, op1);
19100 pat = GEN_FCN (icode) (target, op0, op1);
19107 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19110 arm_expand_unop_builtin (enum insn_code icode,
19111 tree exp, rtx target, int do_load)
19114 tree arg0 = CALL_EXPR_ARG (exp, 0);
19115 rtx op0 = expand_normal (arg0);
19116 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19117 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19120 || GET_MODE (target) != tmode
19121 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19122 target = gen_reg_rtx (tmode);
19124 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19127 if (VECTOR_MODE_P (mode0))
19128 op0 = safe_vector_operand (op0, mode0);
19130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19131 op0 = copy_to_mode_reg (mode0, op0);
19134 pat = GEN_FCN (icode) (target, op0);
19142 neon_builtin_compare (const void *a, const void *b)
19144 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19145 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19146 unsigned int soughtcode = key->base_fcode;
19148 if (soughtcode >= memb->base_fcode
19149 && soughtcode < memb->base_fcode + memb->num_vars)
19151 else if (soughtcode < memb->base_fcode)
19157 static enum insn_code
19158 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19160 neon_builtin_datum key
19161 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19162 neon_builtin_datum *found;
19165 key.base_fcode = fcode;
19166 found = (neon_builtin_datum *)
19167 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19168 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19169 gcc_assert (found);
19170 idx = fcode - (int) found->base_fcode;
19171 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19174 *itype = found->itype;
19176 return found->codes[idx];
19180 NEON_ARG_COPY_TO_REG,
19185 #define NEON_MAX_BUILTIN_ARGS 5
19187 /* Expand a Neon builtin. */
19189 arm_expand_neon_args (rtx target, int icode, int have_retval,
19194 tree arg[NEON_MAX_BUILTIN_ARGS];
19195 rtx op[NEON_MAX_BUILTIN_ARGS];
19196 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19197 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19202 || GET_MODE (target) != tmode
19203 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19204 target = gen_reg_rtx (tmode);
19206 va_start (ap, exp);
19210 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19212 if (thisarg == NEON_ARG_STOP)
19216 arg[argc] = CALL_EXPR_ARG (exp, argc);
19217 op[argc] = expand_normal (arg[argc]);
19218 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19222 case NEON_ARG_COPY_TO_REG:
19223 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19224 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19225 (op[argc], mode[argc]))
19226 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19229 case NEON_ARG_CONSTANT:
19230 /* FIXME: This error message is somewhat unhelpful. */
19231 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19232 (op[argc], mode[argc]))
19233 error ("argument must be a constant");
19236 case NEON_ARG_STOP:
19237 gcc_unreachable ();
19250 pat = GEN_FCN (icode) (target, op[0]);
19254 pat = GEN_FCN (icode) (target, op[0], op[1]);
19258 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19262 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19266 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19270 gcc_unreachable ();
19276 pat = GEN_FCN (icode) (op[0]);
19280 pat = GEN_FCN (icode) (op[0], op[1]);
19284 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19288 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19292 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19296 gcc_unreachable ();
19307 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19308 constants defined per-instruction or per instruction-variant. Instead, the
19309 required info is looked up in the table neon_builtin_data. */
19311 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19314 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19321 return arm_expand_neon_args (target, icode, 1, exp,
19322 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19326 case NEON_SCALARMUL:
19327 case NEON_SCALARMULL:
19328 case NEON_SCALARMULH:
19329 case NEON_SHIFTINSERT:
19330 case NEON_LOGICBINOP:
19331 return arm_expand_neon_args (target, icode, 1, exp,
19332 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19336 return arm_expand_neon_args (target, icode, 1, exp,
19337 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19338 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19342 case NEON_SHIFTIMM:
19343 return arm_expand_neon_args (target, icode, 1, exp,
19344 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19348 return arm_expand_neon_args (target, icode, 1, exp,
19349 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19353 case NEON_REINTERP:
19354 return arm_expand_neon_args (target, icode, 1, exp,
19355 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19359 return arm_expand_neon_args (target, icode, 1, exp,
19360 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19362 case NEON_RESULTPAIR:
19363 return arm_expand_neon_args (target, icode, 0, exp,
19364 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19368 case NEON_LANEMULL:
19369 case NEON_LANEMULH:
19370 return arm_expand_neon_args (target, icode, 1, exp,
19371 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19372 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19375 return arm_expand_neon_args (target, icode, 1, exp,
19376 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19377 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19379 case NEON_SHIFTACC:
19380 return arm_expand_neon_args (target, icode, 1, exp,
19381 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19382 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19384 case NEON_SCALARMAC:
19385 return arm_expand_neon_args (target, icode, 1, exp,
19386 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19387 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19391 return arm_expand_neon_args (target, icode, 1, exp,
19392 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19396 case NEON_LOADSTRUCT:
19397 return arm_expand_neon_args (target, icode, 1, exp,
19398 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19400 case NEON_LOAD1LANE:
19401 case NEON_LOADSTRUCTLANE:
19402 return arm_expand_neon_args (target, icode, 1, exp,
19403 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19407 case NEON_STORESTRUCT:
19408 return arm_expand_neon_args (target, icode, 0, exp,
19409 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19411 case NEON_STORE1LANE:
19412 case NEON_STORESTRUCTLANE:
19413 return arm_expand_neon_args (target, icode, 0, exp,
19414 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19418 gcc_unreachable ();
19421 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19423 neon_reinterpret (rtx dest, rtx src)
19425 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19428 /* Emit code to place a Neon pair result in memory locations (with equal
19431 neon_emit_pair_result_insn (enum machine_mode mode,
19432 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19435 rtx mem = gen_rtx_MEM (mode, destaddr);
19436 rtx tmp1 = gen_reg_rtx (mode);
19437 rtx tmp2 = gen_reg_rtx (mode);
19439 emit_insn (intfn (tmp1, op1, tmp2, op2));
19441 emit_move_insn (mem, tmp1);
19442 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19443 emit_move_insn (mem, tmp2);
19446 /* Set up operands for a register copy from src to dest, taking care not to
19447 clobber registers in the process.
19448 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19449 be called with a large N, so that should be OK. */
19452 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19454 unsigned int copied = 0, opctr = 0;
19455 unsigned int done = (1 << count) - 1;
19458 while (copied != done)
19460 for (i = 0; i < count; i++)
19464 for (j = 0; good && j < count; j++)
19465 if (i != j && (copied & (1 << j)) == 0
19466 && reg_overlap_mentioned_p (src[j], dest[i]))
19471 operands[opctr++] = dest[i];
19472 operands[opctr++] = src[i];
19478 gcc_assert (opctr == count * 2);
19481 /* Expand an expression EXP that calls a built-in function,
19482 with result going to TARGET if that's convenient
19483 (and in mode MODE if that's convenient).
19484 SUBTARGET may be used as the target for computing one of EXP's operands.
19485 IGNORE is nonzero if the value is to be ignored. */
19488 arm_expand_builtin (tree exp,
19490 rtx subtarget ATTRIBUTE_UNUSED,
19491 enum machine_mode mode ATTRIBUTE_UNUSED,
19492 int ignore ATTRIBUTE_UNUSED)
19494 const struct builtin_description * d;
19495 enum insn_code icode;
19496 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19504 int fcode = DECL_FUNCTION_CODE (fndecl);
19506 enum machine_mode tmode;
19507 enum machine_mode mode0;
19508 enum machine_mode mode1;
19509 enum machine_mode mode2;
19511 if (fcode >= ARM_BUILTIN_NEON_BASE)
19512 return arm_expand_neon_builtin (fcode, exp, target);
19516 case ARM_BUILTIN_TEXTRMSB:
19517 case ARM_BUILTIN_TEXTRMUB:
19518 case ARM_BUILTIN_TEXTRMSH:
19519 case ARM_BUILTIN_TEXTRMUH:
19520 case ARM_BUILTIN_TEXTRMSW:
19521 case ARM_BUILTIN_TEXTRMUW:
19522 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19523 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19524 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19525 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19526 : CODE_FOR_iwmmxt_textrmw);
19528 arg0 = CALL_EXPR_ARG (exp, 0);
19529 arg1 = CALL_EXPR_ARG (exp, 1);
19530 op0 = expand_normal (arg0);
19531 op1 = expand_normal (arg1);
19532 tmode = insn_data[icode].operand[0].mode;
19533 mode0 = insn_data[icode].operand[1].mode;
19534 mode1 = insn_data[icode].operand[2].mode;
19536 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19537 op0 = copy_to_mode_reg (mode0, op0);
19538 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19540 /* @@@ better error message */
19541 error ("selector must be an immediate");
19542 return gen_reg_rtx (tmode);
19545 || GET_MODE (target) != tmode
19546 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19547 target = gen_reg_rtx (tmode);
19548 pat = GEN_FCN (icode) (target, op0, op1);
19554 case ARM_BUILTIN_TINSRB:
19555 case ARM_BUILTIN_TINSRH:
19556 case ARM_BUILTIN_TINSRW:
19557 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19558 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19559 : CODE_FOR_iwmmxt_tinsrw);
19560 arg0 = CALL_EXPR_ARG (exp, 0);
19561 arg1 = CALL_EXPR_ARG (exp, 1);
19562 arg2 = CALL_EXPR_ARG (exp, 2);
19563 op0 = expand_normal (arg0);
19564 op1 = expand_normal (arg1);
19565 op2 = expand_normal (arg2);
19566 tmode = insn_data[icode].operand[0].mode;
19567 mode0 = insn_data[icode].operand[1].mode;
19568 mode1 = insn_data[icode].operand[2].mode;
19569 mode2 = insn_data[icode].operand[3].mode;
19571 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19572 op0 = copy_to_mode_reg (mode0, op0);
19573 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19574 op1 = copy_to_mode_reg (mode1, op1);
19575 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19577 /* @@@ better error message */
19578 error ("selector must be an immediate");
19582 || GET_MODE (target) != tmode
19583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19584 target = gen_reg_rtx (tmode);
19585 pat = GEN_FCN (icode) (target, op0, op1, op2);
19591 case ARM_BUILTIN_SETWCX:
19592 arg0 = CALL_EXPR_ARG (exp, 0);
19593 arg1 = CALL_EXPR_ARG (exp, 1);
19594 op0 = force_reg (SImode, expand_normal (arg0));
19595 op1 = expand_normal (arg1);
19596 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19599 case ARM_BUILTIN_GETWCX:
19600 arg0 = CALL_EXPR_ARG (exp, 0);
19601 op0 = expand_normal (arg0);
19602 target = gen_reg_rtx (SImode);
19603 emit_insn (gen_iwmmxt_tmrc (target, op0));
19606 case ARM_BUILTIN_WSHUFH:
19607 icode = CODE_FOR_iwmmxt_wshufh;
19608 arg0 = CALL_EXPR_ARG (exp, 0);
19609 arg1 = CALL_EXPR_ARG (exp, 1);
19610 op0 = expand_normal (arg0);
19611 op1 = expand_normal (arg1);
19612 tmode = insn_data[icode].operand[0].mode;
19613 mode1 = insn_data[icode].operand[1].mode;
19614 mode2 = insn_data[icode].operand[2].mode;
19616 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19617 op0 = copy_to_mode_reg (mode1, op0);
19618 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19620 /* @@@ better error message */
19621 error ("mask must be an immediate");
19625 || GET_MODE (target) != tmode
19626 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19627 target = gen_reg_rtx (tmode);
19628 pat = GEN_FCN (icode) (target, op0, op1);
19634 case ARM_BUILTIN_WSADB:
19635 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19636 case ARM_BUILTIN_WSADH:
19637 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19638 case ARM_BUILTIN_WSADBZ:
19639 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19640 case ARM_BUILTIN_WSADHZ:
19641 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19643 /* Several three-argument builtins. */
19644 case ARM_BUILTIN_WMACS:
19645 case ARM_BUILTIN_WMACU:
19646 case ARM_BUILTIN_WALIGN:
19647 case ARM_BUILTIN_TMIA:
19648 case ARM_BUILTIN_TMIAPH:
19649 case ARM_BUILTIN_TMIATT:
19650 case ARM_BUILTIN_TMIATB:
19651 case ARM_BUILTIN_TMIABT:
19652 case ARM_BUILTIN_TMIABB:
19653 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19654 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19655 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19656 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19657 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19658 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19659 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19660 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19661 : CODE_FOR_iwmmxt_walign);
19662 arg0 = CALL_EXPR_ARG (exp, 0);
19663 arg1 = CALL_EXPR_ARG (exp, 1);
19664 arg2 = CALL_EXPR_ARG (exp, 2);
19665 op0 = expand_normal (arg0);
19666 op1 = expand_normal (arg1);
19667 op2 = expand_normal (arg2);
19668 tmode = insn_data[icode].operand[0].mode;
19669 mode0 = insn_data[icode].operand[1].mode;
19670 mode1 = insn_data[icode].operand[2].mode;
19671 mode2 = insn_data[icode].operand[3].mode;
19673 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19674 op0 = copy_to_mode_reg (mode0, op0);
19675 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19676 op1 = copy_to_mode_reg (mode1, op1);
19677 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19678 op2 = copy_to_mode_reg (mode2, op2);
19680 || GET_MODE (target) != tmode
19681 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19682 target = gen_reg_rtx (tmode);
19683 pat = GEN_FCN (icode) (target, op0, op1, op2);
19689 case ARM_BUILTIN_WZERO:
19690 target = gen_reg_rtx (DImode);
19691 emit_insn (gen_iwmmxt_clrdi (target));
19694 case ARM_BUILTIN_THREAD_POINTER:
19695 return arm_load_tp (target);
19701 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19702 if (d->code == (const enum arm_builtins) fcode)
19703 return arm_expand_binop_builtin (d->icode, exp, target);
19705 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19706 if (d->code == (const enum arm_builtins) fcode)
19707 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19709 /* @@@ Should really do something sensible here. */
19713 /* Return the number (counting from 0) of
19714 the least significant set bit in MASK. */
19717 number_of_first_bit_set (unsigned mask)
19722 (mask & (1 << bit)) == 0;
19729 /* Emit code to push or pop registers to or from the stack. F is the
19730 assembly file. MASK is the registers to push or pop. PUSH is
19731 nonzero if we should push, and zero if we should pop. For debugging
19732 output, if pushing, adjust CFA_OFFSET by the amount of space added
19733 to the stack. REAL_REGS should have the same number of bits set as
19734 MASK, and will be used instead (in the same order) to describe which
19735 registers were saved - this is used to mark the save slots when we
19736 push high registers after moving them to low registers. */
19738 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19739 unsigned long real_regs)
19742 int lo_mask = mask & 0xFF;
19743 int pushed_words = 0;
19747 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19749 /* Special case. Do not generate a POP PC statement here, do it in
19751 thumb_exit (f, -1);
19755 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19757 fprintf (f, "\t.save\t{");
19758 for (regno = 0; regno < 15; regno++)
19760 if (real_regs & (1 << regno))
19762 if (real_regs & ((1 << regno) -1))
19764 asm_fprintf (f, "%r", regno);
19767 fprintf (f, "}\n");
19770 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19772 /* Look at the low registers first. */
19773 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19777 asm_fprintf (f, "%r", regno);
19779 if ((lo_mask & ~1) != 0)
19786 if (push && (mask & (1 << LR_REGNUM)))
19788 /* Catch pushing the LR. */
19792 asm_fprintf (f, "%r", LR_REGNUM);
19796 else if (!push && (mask & (1 << PC_REGNUM)))
19798 /* Catch popping the PC. */
19799 if (TARGET_INTERWORK || TARGET_BACKTRACE
19800 || crtl->calls_eh_return)
19802 /* The PC is never poped directly, instead
19803 it is popped into r3 and then BX is used. */
19804 fprintf (f, "}\n");
19806 thumb_exit (f, -1);
19815 asm_fprintf (f, "%r", PC_REGNUM);
19819 fprintf (f, "}\n");
19821 if (push && pushed_words && dwarf2out_do_frame ())
19823 char *l = dwarf2out_cfi_label (false);
19824 int pushed_mask = real_regs;
19826 *cfa_offset += pushed_words * 4;
19827 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19830 pushed_mask = real_regs;
19831 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19833 if (pushed_mask & 1)
19834 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19839 /* Generate code to return from a thumb function.
19840 If 'reg_containing_return_addr' is -1, then the return address is
19841 actually on the stack, at the stack pointer. */
19843 thumb_exit (FILE *f, int reg_containing_return_addr)
19845 unsigned regs_available_for_popping;
19846 unsigned regs_to_pop;
19848 unsigned available;
19852 int restore_a4 = FALSE;
19854 /* Compute the registers we need to pop. */
19858 if (reg_containing_return_addr == -1)
19860 regs_to_pop |= 1 << LR_REGNUM;
19864 if (TARGET_BACKTRACE)
19866 /* Restore the (ARM) frame pointer and stack pointer. */
19867 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19871 /* If there is nothing to pop then just emit the BX instruction and
19873 if (pops_needed == 0)
19875 if (crtl->calls_eh_return)
19876 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19878 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19881 /* Otherwise if we are not supporting interworking and we have not created
19882 a backtrace structure and the function was not entered in ARM mode then
19883 just pop the return address straight into the PC. */
19884 else if (!TARGET_INTERWORK
19885 && !TARGET_BACKTRACE
19886 && !is_called_in_ARM_mode (current_function_decl)
19887 && !crtl->calls_eh_return)
19889 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19893 /* Find out how many of the (return) argument registers we can corrupt. */
19894 regs_available_for_popping = 0;
19896 /* If returning via __builtin_eh_return, the bottom three registers
19897 all contain information needed for the return. */
19898 if (crtl->calls_eh_return)
19902 /* If we can deduce the registers used from the function's
19903 return value. This is more reliable that examining
19904 df_regs_ever_live_p () because that will be set if the register is
19905 ever used in the function, not just if the register is used
19906 to hold a return value. */
19908 if (crtl->return_rtx != 0)
19909 mode = GET_MODE (crtl->return_rtx);
19911 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19913 size = GET_MODE_SIZE (mode);
19917 /* In a void function we can use any argument register.
19918 In a function that returns a structure on the stack
19919 we can use the second and third argument registers. */
19920 if (mode == VOIDmode)
19921 regs_available_for_popping =
19922 (1 << ARG_REGISTER (1))
19923 | (1 << ARG_REGISTER (2))
19924 | (1 << ARG_REGISTER (3));
19926 regs_available_for_popping =
19927 (1 << ARG_REGISTER (2))
19928 | (1 << ARG_REGISTER (3));
19930 else if (size <= 4)
19931 regs_available_for_popping =
19932 (1 << ARG_REGISTER (2))
19933 | (1 << ARG_REGISTER (3));
19934 else if (size <= 8)
19935 regs_available_for_popping =
19936 (1 << ARG_REGISTER (3));
19939 /* Match registers to be popped with registers into which we pop them. */
19940 for (available = regs_available_for_popping,
19941 required = regs_to_pop;
19942 required != 0 && available != 0;
19943 available &= ~(available & - available),
19944 required &= ~(required & - required))
19947 /* If we have any popping registers left over, remove them. */
19949 regs_available_for_popping &= ~available;
19951 /* Otherwise if we need another popping register we can use
19952 the fourth argument register. */
19953 else if (pops_needed)
19955 /* If we have not found any free argument registers and
19956 reg a4 contains the return address, we must move it. */
19957 if (regs_available_for_popping == 0
19958 && reg_containing_return_addr == LAST_ARG_REGNUM)
19960 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19961 reg_containing_return_addr = LR_REGNUM;
19963 else if (size > 12)
19965 /* Register a4 is being used to hold part of the return value,
19966 but we have dire need of a free, low register. */
19969 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19972 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19974 /* The fourth argument register is available. */
19975 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19981 /* Pop as many registers as we can. */
19982 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19983 regs_available_for_popping);
19985 /* Process the registers we popped. */
19986 if (reg_containing_return_addr == -1)
19988 /* The return address was popped into the lowest numbered register. */
19989 regs_to_pop &= ~(1 << LR_REGNUM);
19991 reg_containing_return_addr =
19992 number_of_first_bit_set (regs_available_for_popping);
19994 /* Remove this register for the mask of available registers, so that
19995 the return address will not be corrupted by further pops. */
19996 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19999 /* If we popped other registers then handle them here. */
20000 if (regs_available_for_popping)
20004 /* Work out which register currently contains the frame pointer. */
20005 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20007 /* Move it into the correct place. */
20008 asm_fprintf (f, "\tmov\t%r, %r\n",
20009 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20011 /* (Temporarily) remove it from the mask of popped registers. */
20012 regs_available_for_popping &= ~(1 << frame_pointer);
20013 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20015 if (regs_available_for_popping)
20019 /* We popped the stack pointer as well,
20020 find the register that contains it. */
20021 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20023 /* Move it into the stack register. */
20024 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20026 /* At this point we have popped all necessary registers, so
20027 do not worry about restoring regs_available_for_popping
20028 to its correct value:
20030 assert (pops_needed == 0)
20031 assert (regs_available_for_popping == (1 << frame_pointer))
20032 assert (regs_to_pop == (1 << STACK_POINTER)) */
20036 /* Since we have just move the popped value into the frame
20037 pointer, the popping register is available for reuse, and
20038 we know that we still have the stack pointer left to pop. */
20039 regs_available_for_popping |= (1 << frame_pointer);
20043 /* If we still have registers left on the stack, but we no longer have
20044 any registers into which we can pop them, then we must move the return
20045 address into the link register and make available the register that
20047 if (regs_available_for_popping == 0 && pops_needed > 0)
20049 regs_available_for_popping |= 1 << reg_containing_return_addr;
20051 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20052 reg_containing_return_addr);
20054 reg_containing_return_addr = LR_REGNUM;
20057 /* If we have registers left on the stack then pop some more.
20058 We know that at most we will want to pop FP and SP. */
20059 if (pops_needed > 0)
20064 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20065 regs_available_for_popping);
20067 /* We have popped either FP or SP.
20068 Move whichever one it is into the correct register. */
20069 popped_into = number_of_first_bit_set (regs_available_for_popping);
20070 move_to = number_of_first_bit_set (regs_to_pop);
20072 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20074 regs_to_pop &= ~(1 << move_to);
20079 /* If we still have not popped everything then we must have only
20080 had one register available to us and we are now popping the SP. */
20081 if (pops_needed > 0)
20085 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20086 regs_available_for_popping);
20088 popped_into = number_of_first_bit_set (regs_available_for_popping);
20090 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20092 assert (regs_to_pop == (1 << STACK_POINTER))
20093 assert (pops_needed == 1)
20097 /* If necessary restore the a4 register. */
20100 if (reg_containing_return_addr != LR_REGNUM)
20102 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20103 reg_containing_return_addr = LR_REGNUM;
20106 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20109 if (crtl->calls_eh_return)
20110 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20112 /* Return to caller. */
20113 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20116 /* Scan INSN just before assembler is output for it.
20117 For Thumb-1, we track the status of the condition codes; this
20118 information is used in the cbranchsi4_insn pattern. */
20120 thumb1_final_prescan_insn (rtx insn)
20122 if (flag_print_asm_name)
20123 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20124 INSN_ADDRESSES (INSN_UID (insn)));
20125 /* Don't overwrite the previous setter when we get to a cbranch. */
20126 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20128 enum attr_conds conds;
20130 if (cfun->machine->thumb1_cc_insn)
20132 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20133 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20136 conds = get_attr_conds (insn);
20137 if (conds == CONDS_SET)
20139 rtx set = single_set (insn);
20140 cfun->machine->thumb1_cc_insn = insn;
20141 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20142 cfun->machine->thumb1_cc_op1 = const0_rtx;
20143 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20144 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20146 rtx src1 = XEXP (SET_SRC (set), 1);
20147 if (src1 == const0_rtx)
20148 cfun->machine->thumb1_cc_mode = CCmode;
20151 else if (conds != CONDS_NOCOND)
20152 cfun->machine->thumb1_cc_insn = NULL_RTX;
20157 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20159 unsigned HOST_WIDE_INT mask = 0xff;
20162 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20163 if (val == 0) /* XXX */
20166 for (i = 0; i < 25; i++)
20167 if ((val & (mask << i)) == val)
20173 /* Returns nonzero if the current function contains,
20174 or might contain a far jump. */
20176 thumb_far_jump_used_p (void)
20180 /* This test is only important for leaf functions. */
20181 /* assert (!leaf_function_p ()); */
20183 /* If we have already decided that far jumps may be used,
20184 do not bother checking again, and always return true even if
20185 it turns out that they are not being used. Once we have made
20186 the decision that far jumps are present (and that hence the link
20187 register will be pushed onto the stack) we cannot go back on it. */
20188 if (cfun->machine->far_jump_used)
20191 /* If this function is not being called from the prologue/epilogue
20192 generation code then it must be being called from the
20193 INITIAL_ELIMINATION_OFFSET macro. */
20194 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20196 /* In this case we know that we are being asked about the elimination
20197 of the arg pointer register. If that register is not being used,
20198 then there are no arguments on the stack, and we do not have to
20199 worry that a far jump might force the prologue to push the link
20200 register, changing the stack offsets. In this case we can just
20201 return false, since the presence of far jumps in the function will
20202 not affect stack offsets.
20204 If the arg pointer is live (or if it was live, but has now been
20205 eliminated and so set to dead) then we do have to test to see if
20206 the function might contain a far jump. This test can lead to some
20207 false negatives, since before reload is completed, then length of
20208 branch instructions is not known, so gcc defaults to returning their
20209 longest length, which in turn sets the far jump attribute to true.
20211 A false negative will not result in bad code being generated, but it
20212 will result in a needless push and pop of the link register. We
20213 hope that this does not occur too often.
20215 If we need doubleword stack alignment this could affect the other
20216 elimination offsets so we can't risk getting it wrong. */
20217 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20218 cfun->machine->arg_pointer_live = 1;
20219 else if (!cfun->machine->arg_pointer_live)
20223 /* Check to see if the function contains a branch
20224 insn with the far jump attribute set. */
20225 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20227 if (GET_CODE (insn) == JUMP_INSN
20228 /* Ignore tablejump patterns. */
20229 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20230 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20231 && get_attr_far_jump (insn) == FAR_JUMP_YES
20234 /* Record the fact that we have decided that
20235 the function does use far jumps. */
20236 cfun->machine->far_jump_used = 1;
20244 /* Return nonzero if FUNC must be entered in ARM mode. */
20246 is_called_in_ARM_mode (tree func)
20248 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20250 /* Ignore the problem about functions whose address is taken. */
20251 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20255 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20261 /* Given the stack offsets and register mask in OFFSETS, decide how
20262 many additional registers to push instead of subtracting a constant
20263 from SP. For epilogues the principle is the same except we use pop.
20264 FOR_PROLOGUE indicates which we're generating. */
20266 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20268 HOST_WIDE_INT amount;
20269 unsigned long live_regs_mask = offsets->saved_regs_mask;
20270 /* Extract a mask of the ones we can give to the Thumb's push/pop
20272 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20273 /* Then count how many other high registers will need to be pushed. */
20274 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20275 int n_free, reg_base;
20277 if (!for_prologue && frame_pointer_needed)
20278 amount = offsets->locals_base - offsets->saved_regs;
20280 amount = offsets->outgoing_args - offsets->saved_regs;
20282 /* If the stack frame size is 512 exactly, we can save one load
20283 instruction, which should make this a win even when optimizing
20285 if (!optimize_size && amount != 512)
20288 /* Can't do this if there are high registers to push. */
20289 if (high_regs_pushed != 0)
20292 /* Shouldn't do it in the prologue if no registers would normally
20293 be pushed at all. In the epilogue, also allow it if we'll have
20294 a pop insn for the PC. */
20297 || TARGET_BACKTRACE
20298 || (live_regs_mask & 1 << LR_REGNUM) == 0
20299 || TARGET_INTERWORK
20300 || crtl->args.pretend_args_size != 0))
20303 /* Don't do this if thumb_expand_prologue wants to emit instructions
20304 between the push and the stack frame allocation. */
20306 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20307 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20314 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20315 live_regs_mask >>= reg_base;
20318 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20319 && (for_prologue || call_used_regs[reg_base + n_free]))
20321 live_regs_mask >>= 1;
20327 gcc_assert (amount / 4 * 4 == amount);
20329 if (amount >= 512 && (amount - n_free * 4) < 512)
20330 return (amount - 508) / 4;
20331 if (amount <= n_free * 4)
20336 /* The bits which aren't usefully expanded as rtl. */
20338 thumb_unexpanded_epilogue (void)
20340 arm_stack_offsets *offsets;
20342 unsigned long live_regs_mask = 0;
20343 int high_regs_pushed = 0;
20345 int had_to_push_lr;
20348 if (cfun->machine->return_used_this_function != 0)
20351 if (IS_NAKED (arm_current_func_type ()))
20354 offsets = arm_get_frame_offsets ();
20355 live_regs_mask = offsets->saved_regs_mask;
20356 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20358 /* If we can deduce the registers used from the function's return value.
20359 This is more reliable that examining df_regs_ever_live_p () because that
20360 will be set if the register is ever used in the function, not just if
20361 the register is used to hold a return value. */
20362 size = arm_size_return_regs ();
20364 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20367 unsigned long extra_mask = (1 << extra_pop) - 1;
20368 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20371 /* The prolog may have pushed some high registers to use as
20372 work registers. e.g. the testsuite file:
20373 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20374 compiles to produce:
20375 push {r4, r5, r6, r7, lr}
20379 as part of the prolog. We have to undo that pushing here. */
20381 if (high_regs_pushed)
20383 unsigned long mask = live_regs_mask & 0xff;
20386 /* The available low registers depend on the size of the value we are
20394 /* Oh dear! We have no low registers into which we can pop
20397 ("no low registers available for popping high registers");
20399 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20400 if (live_regs_mask & (1 << next_hi_reg))
20403 while (high_regs_pushed)
20405 /* Find lo register(s) into which the high register(s) can
20407 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20409 if (mask & (1 << regno))
20410 high_regs_pushed--;
20411 if (high_regs_pushed == 0)
20415 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20417 /* Pop the values into the low register(s). */
20418 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20420 /* Move the value(s) into the high registers. */
20421 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20423 if (mask & (1 << regno))
20425 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20428 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20429 if (live_regs_mask & (1 << next_hi_reg))
20434 live_regs_mask &= ~0x0f00;
20437 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20438 live_regs_mask &= 0xff;
20440 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20442 /* Pop the return address into the PC. */
20443 if (had_to_push_lr)
20444 live_regs_mask |= 1 << PC_REGNUM;
20446 /* Either no argument registers were pushed or a backtrace
20447 structure was created which includes an adjusted stack
20448 pointer, so just pop everything. */
20449 if (live_regs_mask)
20450 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20453 /* We have either just popped the return address into the
20454 PC or it is was kept in LR for the entire function.
20455 Note that thumb_pushpop has already called thumb_exit if the
20456 PC was in the list. */
20457 if (!had_to_push_lr)
20458 thumb_exit (asm_out_file, LR_REGNUM);
20462 /* Pop everything but the return address. */
20463 if (live_regs_mask)
20464 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20467 if (had_to_push_lr)
20471 /* We have no free low regs, so save one. */
20472 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20476 /* Get the return address into a temporary register. */
20477 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20478 1 << LAST_ARG_REGNUM);
20482 /* Move the return address to lr. */
20483 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20485 /* Restore the low register. */
20486 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20491 regno = LAST_ARG_REGNUM;
20496 /* Remove the argument registers that were pushed onto the stack. */
20497 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20498 SP_REGNUM, SP_REGNUM,
20499 crtl->args.pretend_args_size);
20501 thumb_exit (asm_out_file, regno);
20507 /* Functions to save and restore machine-specific function data. */
20508 static struct machine_function *
20509 arm_init_machine_status (void)
20511 struct machine_function *machine;
20512 machine = ggc_alloc_cleared_machine_function ();
20514 #if ARM_FT_UNKNOWN != 0
20515 machine->func_type = ARM_FT_UNKNOWN;
20520 /* Return an RTX indicating where the return address to the
20521 calling function can be found. */
20523 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20528 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20531 /* Do anything needed before RTL is emitted for each function. */
20533 arm_init_expanders (void)
20535 /* Arrange to initialize and mark the machine per-function status. */
20536 init_machine_status = arm_init_machine_status;
20538 /* This is to stop the combine pass optimizing away the alignment
20539 adjustment of va_arg. */
20540 /* ??? It is claimed that this should not be necessary. */
20542 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20546 /* Like arm_compute_initial_elimination offset. Simpler because there
20547 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20548 to point at the base of the local variables after static stack
20549 space for a function has been allocated. */
20552 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20554 arm_stack_offsets *offsets;
20556 offsets = arm_get_frame_offsets ();
20560 case ARG_POINTER_REGNUM:
20563 case STACK_POINTER_REGNUM:
20564 return offsets->outgoing_args - offsets->saved_args;
20566 case FRAME_POINTER_REGNUM:
20567 return offsets->soft_frame - offsets->saved_args;
20569 case ARM_HARD_FRAME_POINTER_REGNUM:
20570 return offsets->saved_regs - offsets->saved_args;
20572 case THUMB_HARD_FRAME_POINTER_REGNUM:
20573 return offsets->locals_base - offsets->saved_args;
20576 gcc_unreachable ();
20580 case FRAME_POINTER_REGNUM:
20583 case STACK_POINTER_REGNUM:
20584 return offsets->outgoing_args - offsets->soft_frame;
20586 case ARM_HARD_FRAME_POINTER_REGNUM:
20587 return offsets->saved_regs - offsets->soft_frame;
20589 case THUMB_HARD_FRAME_POINTER_REGNUM:
20590 return offsets->locals_base - offsets->soft_frame;
20593 gcc_unreachable ();
20598 gcc_unreachable ();
20602 /* Generate the rest of a function's prologue. */
20604 thumb1_expand_prologue (void)
20608 HOST_WIDE_INT amount;
20609 arm_stack_offsets *offsets;
20610 unsigned long func_type;
20612 unsigned long live_regs_mask;
20614 func_type = arm_current_func_type ();
20616 /* Naked functions don't have prologues. */
20617 if (IS_NAKED (func_type))
20620 if (IS_INTERRUPT (func_type))
20622 error ("interrupt Service Routines cannot be coded in Thumb mode");
20626 offsets = arm_get_frame_offsets ();
20627 live_regs_mask = offsets->saved_regs_mask;
20628 /* Load the pic register before setting the frame pointer,
20629 so we can use r7 as a temporary work register. */
20630 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20631 arm_load_pic_register (live_regs_mask);
20633 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20634 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20635 stack_pointer_rtx);
20637 if (flag_stack_usage)
20638 current_function_static_stack_size
20639 = offsets->outgoing_args - offsets->saved_args;
20641 amount = offsets->outgoing_args - offsets->saved_regs;
20642 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20647 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20648 GEN_INT (- amount)));
20649 RTX_FRAME_RELATED_P (insn) = 1;
20655 /* The stack decrement is too big for an immediate value in a single
20656 insn. In theory we could issue multiple subtracts, but after
20657 three of them it becomes more space efficient to place the full
20658 value in the constant pool and load into a register. (Also the
20659 ARM debugger really likes to see only one stack decrement per
20660 function). So instead we look for a scratch register into which
20661 we can load the decrement, and then we subtract this from the
20662 stack pointer. Unfortunately on the thumb the only available
20663 scratch registers are the argument registers, and we cannot use
20664 these as they may hold arguments to the function. Instead we
20665 attempt to locate a call preserved register which is used by this
20666 function. If we can find one, then we know that it will have
20667 been pushed at the start of the prologue and so we can corrupt
20669 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20670 if (live_regs_mask & (1 << regno))
20673 gcc_assert(regno <= LAST_LO_REGNUM);
20675 reg = gen_rtx_REG (SImode, regno);
20677 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20679 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20680 stack_pointer_rtx, reg));
20681 RTX_FRAME_RELATED_P (insn) = 1;
20682 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20683 plus_constant (stack_pointer_rtx,
20685 RTX_FRAME_RELATED_P (dwarf) = 1;
20686 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20690 if (frame_pointer_needed)
20691 thumb_set_frame_pointer (offsets);
20693 /* If we are profiling, make sure no instructions are scheduled before
20694 the call to mcount. Similarly if the user has requested no
20695 scheduling in the prolog. Similarly if we want non-call exceptions
20696 using the EABI unwinder, to prevent faulting instructions from being
20697 swapped with a stack adjustment. */
20698 if (crtl->profile || !TARGET_SCHED_PROLOG
20699 || (arm_except_unwind_info (&global_options) == UI_TARGET
20700 && cfun->can_throw_non_call_exceptions))
20701 emit_insn (gen_blockage ());
20703 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20704 if (live_regs_mask & 0xff)
20705 cfun->machine->lr_save_eliminated = 0;
20710 thumb1_expand_epilogue (void)
20712 HOST_WIDE_INT amount;
20713 arm_stack_offsets *offsets;
20716 /* Naked functions don't have prologues. */
20717 if (IS_NAKED (arm_current_func_type ()))
20720 offsets = arm_get_frame_offsets ();
20721 amount = offsets->outgoing_args - offsets->saved_regs;
20723 if (frame_pointer_needed)
20725 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20726 amount = offsets->locals_base - offsets->saved_regs;
20728 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20730 gcc_assert (amount >= 0);
20734 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20735 GEN_INT (amount)));
20738 /* r3 is always free in the epilogue. */
20739 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20741 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20742 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20746 /* Emit a USE (stack_pointer_rtx), so that
20747 the stack adjustment will not be deleted. */
20748 emit_insn (gen_prologue_use (stack_pointer_rtx));
20750 if (crtl->profile || !TARGET_SCHED_PROLOG)
20751 emit_insn (gen_blockage ());
20753 /* Emit a clobber for each insn that will be restored in the epilogue,
20754 so that flow2 will get register lifetimes correct. */
20755 for (regno = 0; regno < 13; regno++)
20756 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20757 emit_clobber (gen_rtx_REG (SImode, regno));
20759 if (! df_regs_ever_live_p (LR_REGNUM))
20760 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20764 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20766 arm_stack_offsets *offsets;
20767 unsigned long live_regs_mask = 0;
20768 unsigned long l_mask;
20769 unsigned high_regs_pushed = 0;
20770 int cfa_offset = 0;
20773 if (IS_NAKED (arm_current_func_type ()))
20776 if (is_called_in_ARM_mode (current_function_decl))
20780 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20781 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20783 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20785 /* Generate code sequence to switch us into Thumb mode. */
20786 /* The .code 32 directive has already been emitted by
20787 ASM_DECLARE_FUNCTION_NAME. */
20788 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20789 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20791 /* Generate a label, so that the debugger will notice the
20792 change in instruction sets. This label is also used by
20793 the assembler to bypass the ARM code when this function
20794 is called from a Thumb encoded function elsewhere in the
20795 same file. Hence the definition of STUB_NAME here must
20796 agree with the definition in gas/config/tc-arm.c. */
20798 #define STUB_NAME ".real_start_of"
20800 fprintf (f, "\t.code\t16\n");
20802 if (arm_dllexport_name_p (name))
20803 name = arm_strip_name_encoding (name);
20805 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20806 fprintf (f, "\t.thumb_func\n");
20807 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20810 if (crtl->args.pretend_args_size)
20812 /* Output unwind directive for the stack adjustment. */
20813 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20814 fprintf (f, "\t.pad #%d\n",
20815 crtl->args.pretend_args_size);
20817 if (cfun->machine->uses_anonymous_args)
20821 fprintf (f, "\tpush\t{");
20823 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20825 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20826 regno <= LAST_ARG_REGNUM;
20828 asm_fprintf (f, "%r%s", regno,
20829 regno == LAST_ARG_REGNUM ? "" : ", ");
20831 fprintf (f, "}\n");
20834 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20835 SP_REGNUM, SP_REGNUM,
20836 crtl->args.pretend_args_size);
20838 /* We don't need to record the stores for unwinding (would it
20839 help the debugger any if we did?), but record the change in
20840 the stack pointer. */
20841 if (dwarf2out_do_frame ())
20843 char *l = dwarf2out_cfi_label (false);
20845 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20846 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20850 /* Get the registers we are going to push. */
20851 offsets = arm_get_frame_offsets ();
20852 live_regs_mask = offsets->saved_regs_mask;
20853 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20854 l_mask = live_regs_mask & 0x40ff;
20855 /* Then count how many other high registers will need to be pushed. */
20856 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20858 if (TARGET_BACKTRACE)
20861 unsigned work_register;
20863 /* We have been asked to create a stack backtrace structure.
20864 The code looks like this:
20868 0 sub SP, #16 Reserve space for 4 registers.
20869 2 push {R7} Push low registers.
20870 4 add R7, SP, #20 Get the stack pointer before the push.
20871 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20872 8 mov R7, PC Get hold of the start of this code plus 12.
20873 10 str R7, [SP, #16] Store it.
20874 12 mov R7, FP Get hold of the current frame pointer.
20875 14 str R7, [SP, #4] Store it.
20876 16 mov R7, LR Get hold of the current return address.
20877 18 str R7, [SP, #12] Store it.
20878 20 add R7, SP, #16 Point at the start of the backtrace structure.
20879 22 mov FP, R7 Put this value into the frame pointer. */
20881 work_register = thumb_find_work_register (live_regs_mask);
20883 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20884 asm_fprintf (f, "\t.pad #16\n");
20887 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20888 SP_REGNUM, SP_REGNUM);
20890 if (dwarf2out_do_frame ())
20892 char *l = dwarf2out_cfi_label (false);
20894 cfa_offset = cfa_offset + 16;
20895 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20900 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20901 offset = bit_count (l_mask) * UNITS_PER_WORD;
20906 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20907 offset + 16 + crtl->args.pretend_args_size);
20909 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20912 /* Make sure that the instruction fetching the PC is in the right place
20913 to calculate "start of backtrace creation code + 12". */
20916 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20917 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20919 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20920 ARM_HARD_FRAME_POINTER_REGNUM);
20921 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20926 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20927 ARM_HARD_FRAME_POINTER_REGNUM);
20928 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20930 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20931 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20935 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20936 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20938 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20940 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20941 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20943 /* Optimization: If we are not pushing any low registers but we are going
20944 to push some high registers then delay our first push. This will just
20945 be a push of LR and we can combine it with the push of the first high
20947 else if ((l_mask & 0xff) != 0
20948 || (high_regs_pushed == 0 && l_mask))
20950 unsigned long mask = l_mask;
20951 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20952 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20955 if (high_regs_pushed)
20957 unsigned pushable_regs;
20958 unsigned next_hi_reg;
20960 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20961 if (live_regs_mask & (1 << next_hi_reg))
20964 pushable_regs = l_mask & 0xff;
20966 if (pushable_regs == 0)
20967 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20969 while (high_regs_pushed > 0)
20971 unsigned long real_regs_mask = 0;
20973 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20975 if (pushable_regs & (1 << regno))
20977 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20979 high_regs_pushed --;
20980 real_regs_mask |= (1 << next_hi_reg);
20982 if (high_regs_pushed)
20984 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20986 if (live_regs_mask & (1 << next_hi_reg))
20991 pushable_regs &= ~((1 << regno) - 1);
20997 /* If we had to find a work register and we have not yet
20998 saved the LR then add it to the list of regs to push. */
20999 if (l_mask == (1 << LR_REGNUM))
21001 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21003 real_regs_mask | (1 << LR_REGNUM));
21007 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21012 /* Handle the case of a double word load into a low register from
21013 a computed memory address. The computed address may involve a
21014 register which is overwritten by the load. */
21016 thumb_load_double_from_address (rtx *operands)
21024 gcc_assert (GET_CODE (operands[0]) == REG);
21025 gcc_assert (GET_CODE (operands[1]) == MEM);
21027 /* Get the memory address. */
21028 addr = XEXP (operands[1], 0);
21030 /* Work out how the memory address is computed. */
21031 switch (GET_CODE (addr))
21034 operands[2] = adjust_address (operands[1], SImode, 4);
21036 if (REGNO (operands[0]) == REGNO (addr))
21038 output_asm_insn ("ldr\t%H0, %2", operands);
21039 output_asm_insn ("ldr\t%0, %1", operands);
21043 output_asm_insn ("ldr\t%0, %1", operands);
21044 output_asm_insn ("ldr\t%H0, %2", operands);
21049 /* Compute <address> + 4 for the high order load. */
21050 operands[2] = adjust_address (operands[1], SImode, 4);
21052 output_asm_insn ("ldr\t%0, %1", operands);
21053 output_asm_insn ("ldr\t%H0, %2", operands);
21057 arg1 = XEXP (addr, 0);
21058 arg2 = XEXP (addr, 1);
21060 if (CONSTANT_P (arg1))
21061 base = arg2, offset = arg1;
21063 base = arg1, offset = arg2;
21065 gcc_assert (GET_CODE (base) == REG);
21067 /* Catch the case of <address> = <reg> + <reg> */
21068 if (GET_CODE (offset) == REG)
21070 int reg_offset = REGNO (offset);
21071 int reg_base = REGNO (base);
21072 int reg_dest = REGNO (operands[0]);
21074 /* Add the base and offset registers together into the
21075 higher destination register. */
21076 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21077 reg_dest + 1, reg_base, reg_offset);
21079 /* Load the lower destination register from the address in
21080 the higher destination register. */
21081 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21082 reg_dest, reg_dest + 1);
21084 /* Load the higher destination register from its own address
21086 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21087 reg_dest + 1, reg_dest + 1);
21091 /* Compute <address> + 4 for the high order load. */
21092 operands[2] = adjust_address (operands[1], SImode, 4);
21094 /* If the computed address is held in the low order register
21095 then load the high order register first, otherwise always
21096 load the low order register first. */
21097 if (REGNO (operands[0]) == REGNO (base))
21099 output_asm_insn ("ldr\t%H0, %2", operands);
21100 output_asm_insn ("ldr\t%0, %1", operands);
21104 output_asm_insn ("ldr\t%0, %1", operands);
21105 output_asm_insn ("ldr\t%H0, %2", operands);
21111 /* With no registers to worry about we can just load the value
21113 operands[2] = adjust_address (operands[1], SImode, 4);
21115 output_asm_insn ("ldr\t%H0, %2", operands);
21116 output_asm_insn ("ldr\t%0, %1", operands);
21120 gcc_unreachable ();
21127 thumb_output_move_mem_multiple (int n, rtx *operands)
21134 if (REGNO (operands[4]) > REGNO (operands[5]))
21137 operands[4] = operands[5];
21140 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21141 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21145 if (REGNO (operands[4]) > REGNO (operands[5]))
21148 operands[4] = operands[5];
21151 if (REGNO (operands[5]) > REGNO (operands[6]))
21154 operands[5] = operands[6];
21157 if (REGNO (operands[4]) > REGNO (operands[5]))
21160 operands[4] = operands[5];
21164 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21165 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21169 gcc_unreachable ();
21175 /* Output a call-via instruction for thumb state. */
21177 thumb_call_via_reg (rtx reg)
21179 int regno = REGNO (reg);
21182 gcc_assert (regno < LR_REGNUM);
21184 /* If we are in the normal text section we can use a single instance
21185 per compilation unit. If we are doing function sections, then we need
21186 an entry per section, since we can't rely on reachability. */
21187 if (in_section == text_section)
21189 thumb_call_reg_needed = 1;
21191 if (thumb_call_via_label[regno] == NULL)
21192 thumb_call_via_label[regno] = gen_label_rtx ();
21193 labelp = thumb_call_via_label + regno;
21197 if (cfun->machine->call_via[regno] == NULL)
21198 cfun->machine->call_via[regno] = gen_label_rtx ();
21199 labelp = cfun->machine->call_via + regno;
21202 output_asm_insn ("bl\t%a0", labelp);
21206 /* Routines for generating rtl. */
21208 thumb_expand_movmemqi (rtx *operands)
21210 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21211 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21212 HOST_WIDE_INT len = INTVAL (operands[2]);
21213 HOST_WIDE_INT offset = 0;
21217 emit_insn (gen_movmem12b (out, in, out, in));
21223 emit_insn (gen_movmem8b (out, in, out, in));
21229 rtx reg = gen_reg_rtx (SImode);
21230 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21231 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21238 rtx reg = gen_reg_rtx (HImode);
21239 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21240 plus_constant (in, offset))));
21241 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21249 rtx reg = gen_reg_rtx (QImode);
21250 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21251 plus_constant (in, offset))));
21252 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21258 thumb_reload_out_hi (rtx *operands)
21260 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21263 /* Handle reading a half-word from memory during reload. */
21265 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21267 gcc_unreachable ();
21270 /* Return the length of a function name prefix
21271 that starts with the character 'c'. */
21273 arm_get_strip_length (int c)
21277 ARM_NAME_ENCODING_LENGTHS
21282 /* Return a pointer to a function's name with any
21283 and all prefix encodings stripped from it. */
21285 arm_strip_name_encoding (const char *name)
21289 while ((skip = arm_get_strip_length (* name)))
21295 /* If there is a '*' anywhere in the name's prefix, then
21296 emit the stripped name verbatim, otherwise prepend an
21297 underscore if leading underscores are being used. */
21299 arm_asm_output_labelref (FILE *stream, const char *name)
21304 while ((skip = arm_get_strip_length (* name)))
21306 verbatim |= (*name == '*');
21311 fputs (name, stream);
21313 asm_fprintf (stream, "%U%s", name);
21317 arm_file_start (void)
21321 if (TARGET_UNIFIED_ASM)
21322 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21326 const char *fpu_name;
21327 if (arm_selected_arch)
21328 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21330 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21332 if (TARGET_SOFT_FLOAT)
21335 fpu_name = "softvfp";
21337 fpu_name = "softfpa";
21341 fpu_name = arm_fpu_desc->name;
21342 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21344 if (TARGET_HARD_FLOAT)
21345 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21346 if (TARGET_HARD_FLOAT_ABI)
21347 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21350 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21352 /* Some of these attributes only apply when the corresponding features
21353 are used. However we don't have any easy way of figuring this out.
21354 Conservatively record the setting that would have been used. */
21356 /* Tag_ABI_FP_rounding. */
21357 if (flag_rounding_math)
21358 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21359 if (!flag_unsafe_math_optimizations)
21361 /* Tag_ABI_FP_denomal. */
21362 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21363 /* Tag_ABI_FP_exceptions. */
21364 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21366 /* Tag_ABI_FP_user_exceptions. */
21367 if (flag_signaling_nans)
21368 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21369 /* Tag_ABI_FP_number_model. */
21370 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21371 flag_finite_math_only ? 1 : 3);
21373 /* Tag_ABI_align8_needed. */
21374 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21375 /* Tag_ABI_align8_preserved. */
21376 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21377 /* Tag_ABI_enum_size. */
21378 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21379 flag_short_enums ? 1 : 2);
21381 /* Tag_ABI_optimization_goals. */
21384 else if (optimize >= 2)
21390 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21392 /* Tag_ABI_FP_16bit_format. */
21393 if (arm_fp16_format)
21394 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21395 (int)arm_fp16_format);
21397 if (arm_lang_output_object_attributes_hook)
21398 arm_lang_output_object_attributes_hook();
21400 default_file_start();
21404 arm_file_end (void)
21408 if (NEED_INDICATE_EXEC_STACK)
21409 /* Add .note.GNU-stack. */
21410 file_end_indicate_exec_stack ();
21412 if (! thumb_call_reg_needed)
21415 switch_to_section (text_section);
21416 asm_fprintf (asm_out_file, "\t.code 16\n");
21417 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21419 for (regno = 0; regno < LR_REGNUM; regno++)
21421 rtx label = thumb_call_via_label[regno];
21425 targetm.asm_out.internal_label (asm_out_file, "L",
21426 CODE_LABEL_NUMBER (label));
21427 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21433 /* Symbols in the text segment can be accessed without indirecting via the
21434 constant pool; it may take an extra binary operation, but this is still
21435 faster than indirecting via memory. Don't do this when not optimizing,
21436 since we won't be calculating al of the offsets necessary to do this
21440 arm_encode_section_info (tree decl, rtx rtl, int first)
21442 if (optimize > 0 && TREE_CONSTANT (decl))
21443 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21445 default_encode_section_info (decl, rtl, first);
21447 #endif /* !ARM_PE */
21450 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21452 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21453 && !strcmp (prefix, "L"))
21455 arm_ccfsm_state = 0;
21456 arm_target_insn = NULL;
21458 default_internal_label (stream, prefix, labelno);
21461 /* Output code to add DELTA to the first argument, and then jump
21462 to FUNCTION. Used for C++ multiple inheritance. */
21464 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21465 HOST_WIDE_INT delta,
21466 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21469 static int thunk_label = 0;
21472 int mi_delta = delta;
21473 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21475 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21478 mi_delta = - mi_delta;
21482 int labelno = thunk_label++;
21483 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21484 /* Thunks are entered in arm mode when avaiable. */
21485 if (TARGET_THUMB1_ONLY)
21487 /* push r3 so we can use it as a temporary. */
21488 /* TODO: Omit this save if r3 is not used. */
21489 fputs ("\tpush {r3}\n", file);
21490 fputs ("\tldr\tr3, ", file);
21494 fputs ("\tldr\tr12, ", file);
21496 assemble_name (file, label);
21497 fputc ('\n', file);
21500 /* If we are generating PIC, the ldr instruction below loads
21501 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21502 the address of the add + 8, so we have:
21504 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21507 Note that we have "+ 1" because some versions of GNU ld
21508 don't set the low bit of the result for R_ARM_REL32
21509 relocations against thumb function symbols.
21510 On ARMv6M this is +4, not +8. */
21511 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21512 assemble_name (file, labelpc);
21513 fputs (":\n", file);
21514 if (TARGET_THUMB1_ONLY)
21516 /* This is 2 insns after the start of the thunk, so we know it
21517 is 4-byte aligned. */
21518 fputs ("\tadd\tr3, pc, r3\n", file);
21519 fputs ("\tmov r12, r3\n", file);
21522 fputs ("\tadd\tr12, pc, r12\n", file);
21524 else if (TARGET_THUMB1_ONLY)
21525 fputs ("\tmov r12, r3\n", file);
21527 if (TARGET_THUMB1_ONLY)
21529 if (mi_delta > 255)
21531 fputs ("\tldr\tr3, ", file);
21532 assemble_name (file, label);
21533 fputs ("+4\n", file);
21534 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21535 mi_op, this_regno, this_regno);
21537 else if (mi_delta != 0)
21539 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21540 mi_op, this_regno, this_regno,
21546 /* TODO: Use movw/movt for large constants when available. */
21547 while (mi_delta != 0)
21549 if ((mi_delta & (3 << shift)) == 0)
21553 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21554 mi_op, this_regno, this_regno,
21555 mi_delta & (0xff << shift));
21556 mi_delta &= ~(0xff << shift);
21563 if (TARGET_THUMB1_ONLY)
21564 fputs ("\tpop\t{r3}\n", file);
21566 fprintf (file, "\tbx\tr12\n");
21567 ASM_OUTPUT_ALIGN (file, 2);
21568 assemble_name (file, label);
21569 fputs (":\n", file);
21572 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21573 rtx tem = XEXP (DECL_RTL (function), 0);
21574 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21575 tem = gen_rtx_MINUS (GET_MODE (tem),
21577 gen_rtx_SYMBOL_REF (Pmode,
21578 ggc_strdup (labelpc)));
21579 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21582 /* Output ".word .LTHUNKn". */
21583 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21585 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21586 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21590 fputs ("\tb\t", file);
21591 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21592 if (NEED_PLT_RELOC)
21593 fputs ("(PLT)", file);
21594 fputc ('\n', file);
21599 arm_emit_vector_const (FILE *file, rtx x)
21602 const char * pattern;
21604 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21606 switch (GET_MODE (x))
21608 case V2SImode: pattern = "%08x"; break;
21609 case V4HImode: pattern = "%04x"; break;
21610 case V8QImode: pattern = "%02x"; break;
21611 default: gcc_unreachable ();
21614 fprintf (file, "0x");
21615 for (i = CONST_VECTOR_NUNITS (x); i--;)
21619 element = CONST_VECTOR_ELT (x, i);
21620 fprintf (file, pattern, INTVAL (element));
21626 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21627 HFmode constant pool entries are actually loaded with ldr. */
21629 arm_emit_fp16_const (rtx c)
21634 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21635 bits = real_to_target (NULL, &r, HFmode);
21636 if (WORDS_BIG_ENDIAN)
21637 assemble_zeros (2);
21638 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21639 if (!WORDS_BIG_ENDIAN)
21640 assemble_zeros (2);
21644 arm_output_load_gr (rtx *operands)
21651 if (GET_CODE (operands [1]) != MEM
21652 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21653 || GET_CODE (reg = XEXP (sum, 0)) != REG
21654 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21655 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21656 return "wldrw%?\t%0, %1";
21658 /* Fix up an out-of-range load of a GR register. */
21659 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21660 wcgr = operands[0];
21662 output_asm_insn ("ldr%?\t%0, %1", operands);
21664 operands[0] = wcgr;
21666 output_asm_insn ("tmcr%?\t%0, %1", operands);
21667 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21672 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21674 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21675 named arg and all anonymous args onto the stack.
21676 XXX I know the prologue shouldn't be pushing registers, but it is faster
21680 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21681 enum machine_mode mode,
21684 int second_time ATTRIBUTE_UNUSED)
21688 cfun->machine->uses_anonymous_args = 1;
21689 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21691 nregs = pcum->aapcs_ncrn;
21692 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21696 nregs = pcum->nregs;
21698 if (nregs < NUM_ARG_REGS)
21699 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21702 /* Return nonzero if the CONSUMER instruction (a store) does not need
21703 PRODUCER's value to calculate the address. */
21706 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21708 rtx value = PATTERN (producer);
21709 rtx addr = PATTERN (consumer);
21711 if (GET_CODE (value) == COND_EXEC)
21712 value = COND_EXEC_CODE (value);
21713 if (GET_CODE (value) == PARALLEL)
21714 value = XVECEXP (value, 0, 0);
21715 value = XEXP (value, 0);
21716 if (GET_CODE (addr) == COND_EXEC)
21717 addr = COND_EXEC_CODE (addr);
21718 if (GET_CODE (addr) == PARALLEL)
21719 addr = XVECEXP (addr, 0, 0);
21720 addr = XEXP (addr, 0);
21722 return !reg_overlap_mentioned_p (value, addr);
21725 /* Return nonzero if the CONSUMER instruction (a store) does need
21726 PRODUCER's value to calculate the address. */
21729 arm_early_store_addr_dep (rtx producer, rtx consumer)
21731 return !arm_no_early_store_addr_dep (producer, consumer);
21734 /* Return nonzero if the CONSUMER instruction (a load) does need
21735 PRODUCER's value to calculate the address. */
21738 arm_early_load_addr_dep (rtx producer, rtx consumer)
21740 rtx value = PATTERN (producer);
21741 rtx addr = PATTERN (consumer);
21743 if (GET_CODE (value) == COND_EXEC)
21744 value = COND_EXEC_CODE (value);
21745 if (GET_CODE (value) == PARALLEL)
21746 value = XVECEXP (value, 0, 0);
21747 value = XEXP (value, 0);
21748 if (GET_CODE (addr) == COND_EXEC)
21749 addr = COND_EXEC_CODE (addr);
21750 if (GET_CODE (addr) == PARALLEL)
21751 addr = XVECEXP (addr, 0, 0);
21752 addr = XEXP (addr, 1);
21754 return reg_overlap_mentioned_p (value, addr);
21757 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21758 have an early register shift value or amount dependency on the
21759 result of PRODUCER. */
21762 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21764 rtx value = PATTERN (producer);
21765 rtx op = PATTERN (consumer);
21768 if (GET_CODE (value) == COND_EXEC)
21769 value = COND_EXEC_CODE (value);
21770 if (GET_CODE (value) == PARALLEL)
21771 value = XVECEXP (value, 0, 0);
21772 value = XEXP (value, 0);
21773 if (GET_CODE (op) == COND_EXEC)
21774 op = COND_EXEC_CODE (op);
21775 if (GET_CODE (op) == PARALLEL)
21776 op = XVECEXP (op, 0, 0);
21779 early_op = XEXP (op, 0);
21780 /* This is either an actual independent shift, or a shift applied to
21781 the first operand of another operation. We want the whole shift
21783 if (GET_CODE (early_op) == REG)
21786 return !reg_overlap_mentioned_p (value, early_op);
21789 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21790 have an early register shift value dependency on the result of
21794 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21796 rtx value = PATTERN (producer);
21797 rtx op = PATTERN (consumer);
21800 if (GET_CODE (value) == COND_EXEC)
21801 value = COND_EXEC_CODE (value);
21802 if (GET_CODE (value) == PARALLEL)
21803 value = XVECEXP (value, 0, 0);
21804 value = XEXP (value, 0);
21805 if (GET_CODE (op) == COND_EXEC)
21806 op = COND_EXEC_CODE (op);
21807 if (GET_CODE (op) == PARALLEL)
21808 op = XVECEXP (op, 0, 0);
21811 early_op = XEXP (op, 0);
21813 /* This is either an actual independent shift, or a shift applied to
21814 the first operand of another operation. We want the value being
21815 shifted, in either case. */
21816 if (GET_CODE (early_op) != REG)
21817 early_op = XEXP (early_op, 0);
21819 return !reg_overlap_mentioned_p (value, early_op);
21822 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21823 have an early register mult dependency on the result of
21827 arm_no_early_mul_dep (rtx producer, rtx consumer)
21829 rtx value = PATTERN (producer);
21830 rtx op = PATTERN (consumer);
21832 if (GET_CODE (value) == COND_EXEC)
21833 value = COND_EXEC_CODE (value);
21834 if (GET_CODE (value) == PARALLEL)
21835 value = XVECEXP (value, 0, 0);
21836 value = XEXP (value, 0);
21837 if (GET_CODE (op) == COND_EXEC)
21838 op = COND_EXEC_CODE (op);
21839 if (GET_CODE (op) == PARALLEL)
21840 op = XVECEXP (op, 0, 0);
21843 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21845 if (GET_CODE (XEXP (op, 0)) == MULT)
21846 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21848 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21854 /* We can't rely on the caller doing the proper promotion when
21855 using APCS or ATPCS. */
21858 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21860 return !TARGET_AAPCS_BASED;
21863 static enum machine_mode
21864 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21865 enum machine_mode mode,
21866 int *punsignedp ATTRIBUTE_UNUSED,
21867 const_tree fntype ATTRIBUTE_UNUSED,
21868 int for_return ATTRIBUTE_UNUSED)
21870 if (GET_MODE_CLASS (mode) == MODE_INT
21871 && GET_MODE_SIZE (mode) < 4)
21877 /* AAPCS based ABIs use short enums by default. */
21880 arm_default_short_enums (void)
21882 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21886 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21889 arm_align_anon_bitfield (void)
21891 return TARGET_AAPCS_BASED;
21895 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21898 arm_cxx_guard_type (void)
21900 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21903 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21904 has an accumulator dependency on the result of the producer (a
21905 multiplication instruction) and no other dependency on that result. */
21907 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21909 rtx mul = PATTERN (producer);
21910 rtx mac = PATTERN (consumer);
21912 rtx mac_op0, mac_op1, mac_acc;
21914 if (GET_CODE (mul) == COND_EXEC)
21915 mul = COND_EXEC_CODE (mul);
21916 if (GET_CODE (mac) == COND_EXEC)
21917 mac = COND_EXEC_CODE (mac);
21919 /* Check that mul is of the form (set (...) (mult ...))
21920 and mla is of the form (set (...) (plus (mult ...) (...))). */
21921 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21922 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21923 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21926 mul_result = XEXP (mul, 0);
21927 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21928 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21929 mac_acc = XEXP (XEXP (mac, 1), 1);
21931 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21932 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21933 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21937 /* The EABI says test the least significant bit of a guard variable. */
21940 arm_cxx_guard_mask_bit (void)
21942 return TARGET_AAPCS_BASED;
21946 /* The EABI specifies that all array cookies are 8 bytes long. */
21949 arm_get_cookie_size (tree type)
21953 if (!TARGET_AAPCS_BASED)
21954 return default_cxx_get_cookie_size (type);
21956 size = build_int_cst (sizetype, 8);
21961 /* The EABI says that array cookies should also contain the element size. */
21964 arm_cookie_has_size (void)
21966 return TARGET_AAPCS_BASED;
21970 /* The EABI says constructors and destructors should return a pointer to
21971 the object constructed/destroyed. */
21974 arm_cxx_cdtor_returns_this (void)
21976 return TARGET_AAPCS_BASED;
21979 /* The EABI says that an inline function may never be the key
21983 arm_cxx_key_method_may_be_inline (void)
21985 return !TARGET_AAPCS_BASED;
21989 arm_cxx_determine_class_data_visibility (tree decl)
21991 if (!TARGET_AAPCS_BASED
21992 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21995 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21996 is exported. However, on systems without dynamic vague linkage,
21997 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21998 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21999 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22001 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22002 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22006 arm_cxx_class_data_always_comdat (void)
22008 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22009 vague linkage if the class has no key function. */
22010 return !TARGET_AAPCS_BASED;
22014 /* The EABI says __aeabi_atexit should be used to register static
22018 arm_cxx_use_aeabi_atexit (void)
22020 return TARGET_AAPCS_BASED;
22025 arm_set_return_address (rtx source, rtx scratch)
22027 arm_stack_offsets *offsets;
22028 HOST_WIDE_INT delta;
22030 unsigned long saved_regs;
22032 offsets = arm_get_frame_offsets ();
22033 saved_regs = offsets->saved_regs_mask;
22035 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22036 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22039 if (frame_pointer_needed)
22040 addr = plus_constant(hard_frame_pointer_rtx, -4);
22043 /* LR will be the first saved register. */
22044 delta = offsets->outgoing_args - (offsets->frame + 4);
22049 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22050 GEN_INT (delta & ~4095)));
22055 addr = stack_pointer_rtx;
22057 addr = plus_constant (addr, delta);
22059 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22065 thumb_set_return_address (rtx source, rtx scratch)
22067 arm_stack_offsets *offsets;
22068 HOST_WIDE_INT delta;
22069 HOST_WIDE_INT limit;
22072 unsigned long mask;
22076 offsets = arm_get_frame_offsets ();
22077 mask = offsets->saved_regs_mask;
22078 if (mask & (1 << LR_REGNUM))
22081 /* Find the saved regs. */
22082 if (frame_pointer_needed)
22084 delta = offsets->soft_frame - offsets->saved_args;
22085 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22091 delta = offsets->outgoing_args - offsets->saved_args;
22094 /* Allow for the stack frame. */
22095 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22097 /* The link register is always the first saved register. */
22100 /* Construct the address. */
22101 addr = gen_rtx_REG (SImode, reg);
22104 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22105 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22109 addr = plus_constant (addr, delta);
22111 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22114 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22117 /* Implements target hook vector_mode_supported_p. */
22119 arm_vector_mode_supported_p (enum machine_mode mode)
22121 /* Neon also supports V2SImode, etc. listed in the clause below. */
22122 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22123 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22126 if ((TARGET_NEON || TARGET_IWMMXT)
22127 && ((mode == V2SImode)
22128 || (mode == V4HImode)
22129 || (mode == V8QImode)))
22135 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22136 registers when autovectorizing for Neon, at least until multiple vector
22137 widths are supported properly by the middle-end. */
22139 static enum machine_mode
22140 arm_preferred_simd_mode (enum machine_mode mode)
22146 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22148 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22150 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22152 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22154 if (TARGET_NEON_VECTORIZE_QUAD)
22161 if (TARGET_REALLY_IWMMXT)
22177 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22179 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22180 using r0-r4 for function arguments, r7 for the stack frame and don't have
22181 enough left over to do doubleword arithmetic. For Thumb-2 all the
22182 potentially problematic instructions accept high registers so this is not
22183 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22184 that require many low registers. */
22186 arm_class_likely_spilled_p (reg_class_t rclass)
22188 if ((TARGET_THUMB1 && rclass == LO_REGS)
22189 || rclass == CC_REG)
22195 /* Implements target hook small_register_classes_for_mode_p. */
22197 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22199 return TARGET_THUMB1;
22202 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22203 ARM insns and therefore guarantee that the shift count is modulo 256.
22204 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22205 guarantee no particular behavior for out-of-range counts. */
22207 static unsigned HOST_WIDE_INT
22208 arm_shift_truncation_mask (enum machine_mode mode)
22210 return mode == SImode ? 255 : 0;
22214 /* Map internal gcc register numbers to DWARF2 register numbers. */
22217 arm_dbx_register_number (unsigned int regno)
22222 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22223 compatibility. The EABI defines them as registers 96-103. */
22224 if (IS_FPA_REGNUM (regno))
22225 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22227 if (IS_VFP_REGNUM (regno))
22229 /* See comment in arm_dwarf_register_span. */
22230 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22231 return 64 + regno - FIRST_VFP_REGNUM;
22233 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22236 if (IS_IWMMXT_GR_REGNUM (regno))
22237 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22239 if (IS_IWMMXT_REGNUM (regno))
22240 return 112 + regno - FIRST_IWMMXT_REGNUM;
22242 gcc_unreachable ();
22245 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22246 GCC models tham as 64 32-bit registers, so we need to describe this to
22247 the DWARF generation code. Other registers can use the default. */
22249 arm_dwarf_register_span (rtx rtl)
22256 regno = REGNO (rtl);
22257 if (!IS_VFP_REGNUM (regno))
22260 /* XXX FIXME: The EABI defines two VFP register ranges:
22261 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22263 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22264 corresponding D register. Until GDB supports this, we shall use the
22265 legacy encodings. We also use these encodings for D0-D15 for
22266 compatibility with older debuggers. */
22267 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22270 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22271 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22272 regno = (regno - FIRST_VFP_REGNUM) / 2;
22273 for (i = 0; i < nregs; i++)
22274 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22279 #if ARM_UNWIND_INFO
22280 /* Emit unwind directives for a store-multiple instruction or stack pointer
22281 push during alignment.
22282 These should only ever be generated by the function prologue code, so
22283 expect them to have a particular form. */
22286 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22289 HOST_WIDE_INT offset;
22290 HOST_WIDE_INT nregs;
22296 e = XVECEXP (p, 0, 0);
22297 if (GET_CODE (e) != SET)
22300 /* First insn will adjust the stack pointer. */
22301 if (GET_CODE (e) != SET
22302 || GET_CODE (XEXP (e, 0)) != REG
22303 || REGNO (XEXP (e, 0)) != SP_REGNUM
22304 || GET_CODE (XEXP (e, 1)) != PLUS)
22307 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22308 nregs = XVECLEN (p, 0) - 1;
22310 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22313 /* The function prologue may also push pc, but not annotate it as it is
22314 never restored. We turn this into a stack pointer adjustment. */
22315 if (nregs * 4 == offset - 4)
22317 fprintf (asm_out_file, "\t.pad #4\n");
22321 fprintf (asm_out_file, "\t.save {");
22323 else if (IS_VFP_REGNUM (reg))
22326 fprintf (asm_out_file, "\t.vsave {");
22328 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22330 /* FPA registers are done differently. */
22331 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22335 /* Unknown register type. */
22338 /* If the stack increment doesn't match the size of the saved registers,
22339 something has gone horribly wrong. */
22340 if (offset != nregs * reg_size)
22345 /* The remaining insns will describe the stores. */
22346 for (i = 1; i <= nregs; i++)
22348 /* Expect (set (mem <addr>) (reg)).
22349 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22350 e = XVECEXP (p, 0, i);
22351 if (GET_CODE (e) != SET
22352 || GET_CODE (XEXP (e, 0)) != MEM
22353 || GET_CODE (XEXP (e, 1)) != REG)
22356 reg = REGNO (XEXP (e, 1));
22361 fprintf (asm_out_file, ", ");
22362 /* We can't use %r for vfp because we need to use the
22363 double precision register names. */
22364 if (IS_VFP_REGNUM (reg))
22365 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22367 asm_fprintf (asm_out_file, "%r", reg);
22369 #ifdef ENABLE_CHECKING
22370 /* Check that the addresses are consecutive. */
22371 e = XEXP (XEXP (e, 0), 0);
22372 if (GET_CODE (e) == PLUS)
22374 offset += reg_size;
22375 if (GET_CODE (XEXP (e, 0)) != REG
22376 || REGNO (XEXP (e, 0)) != SP_REGNUM
22377 || GET_CODE (XEXP (e, 1)) != CONST_INT
22378 || offset != INTVAL (XEXP (e, 1)))
22382 || GET_CODE (e) != REG
22383 || REGNO (e) != SP_REGNUM)
22387 fprintf (asm_out_file, "}\n");
22390 /* Emit unwind directives for a SET. */
22393 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22401 switch (GET_CODE (e0))
22404 /* Pushing a single register. */
22405 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22406 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22407 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22410 asm_fprintf (asm_out_file, "\t.save ");
22411 if (IS_VFP_REGNUM (REGNO (e1)))
22412 asm_fprintf(asm_out_file, "{d%d}\n",
22413 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22415 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22419 if (REGNO (e0) == SP_REGNUM)
22421 /* A stack increment. */
22422 if (GET_CODE (e1) != PLUS
22423 || GET_CODE (XEXP (e1, 0)) != REG
22424 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22425 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22428 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22429 -INTVAL (XEXP (e1, 1)));
22431 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22433 HOST_WIDE_INT offset;
22435 if (GET_CODE (e1) == PLUS)
22437 if (GET_CODE (XEXP (e1, 0)) != REG
22438 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22440 reg = REGNO (XEXP (e1, 0));
22441 offset = INTVAL (XEXP (e1, 1));
22442 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22443 HARD_FRAME_POINTER_REGNUM, reg,
22446 else if (GET_CODE (e1) == REG)
22449 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22450 HARD_FRAME_POINTER_REGNUM, reg);
22455 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22457 /* Move from sp to reg. */
22458 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22460 else if (GET_CODE (e1) == PLUS
22461 && GET_CODE (XEXP (e1, 0)) == REG
22462 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22463 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22465 /* Set reg to offset from sp. */
22466 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22467 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22469 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22471 /* Stack pointer save before alignment. */
22473 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22486 /* Emit unwind directives for the given insn. */
22489 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22493 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22496 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22497 && (TREE_NOTHROW (current_function_decl)
22498 || crtl->all_throwers_are_sibcalls))
22501 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22504 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22506 pat = XEXP (pat, 0);
22508 pat = PATTERN (insn);
22510 switch (GET_CODE (pat))
22513 arm_unwind_emit_set (asm_out_file, pat);
22517 /* Store multiple. */
22518 arm_unwind_emit_sequence (asm_out_file, pat);
22527 /* Output a reference from a function exception table to the type_info
22528 object X. The EABI specifies that the symbol should be relocated by
22529 an R_ARM_TARGET2 relocation. */
22532 arm_output_ttype (rtx x)
22534 fputs ("\t.word\t", asm_out_file);
22535 output_addr_const (asm_out_file, x);
22536 /* Use special relocations for symbol references. */
22537 if (GET_CODE (x) != CONST_INT)
22538 fputs ("(TARGET2)", asm_out_file);
22539 fputc ('\n', asm_out_file);
22544 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22547 arm_asm_emit_except_personality (rtx personality)
22549 fputs ("\t.personality\t", asm_out_file);
22550 output_addr_const (asm_out_file, personality);
22551 fputc ('\n', asm_out_file);
22554 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22557 arm_asm_init_sections (void)
22559 exception_section = get_unnamed_section (0, output_section_asm_op,
22562 #endif /* ARM_UNWIND_INFO */
22564 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22566 static enum unwind_info_type
22567 arm_except_unwind_info (struct gcc_options *opts)
22569 /* Honor the --enable-sjlj-exceptions configure switch. */
22570 #ifdef CONFIG_SJLJ_EXCEPTIONS
22571 if (CONFIG_SJLJ_EXCEPTIONS)
22575 /* If not using ARM EABI unwind tables... */
22576 if (ARM_UNWIND_INFO)
22578 /* For simplicity elsewhere in this file, indicate that all unwind
22579 info is disabled if we're not emitting unwind tables. */
22580 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22586 /* ... we use sjlj exceptions for backwards compatibility. */
22591 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22592 stack alignment. */
22595 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22597 rtx unspec = SET_SRC (pattern);
22598 gcc_assert (GET_CODE (unspec) == UNSPEC);
22602 case UNSPEC_STACK_ALIGN:
22603 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22604 put anything on the stack, so hopefully it won't matter.
22605 CFA = SP will be correct after alignment. */
22606 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22607 SET_DEST (pattern));
22610 gcc_unreachable ();
22615 /* Output unwind directives for the start/end of a function. */
22618 arm_output_fn_unwind (FILE * f, bool prologue)
22620 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22624 fputs ("\t.fnstart\n", f);
22627 /* If this function will never be unwound, then mark it as such.
22628 The came condition is used in arm_unwind_emit to suppress
22629 the frame annotations. */
22630 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22631 && (TREE_NOTHROW (current_function_decl)
22632 || crtl->all_throwers_are_sibcalls))
22633 fputs("\t.cantunwind\n", f);
22635 fputs ("\t.fnend\n", f);
22640 arm_emit_tls_decoration (FILE *fp, rtx x)
22642 enum tls_reloc reloc;
22645 val = XVECEXP (x, 0, 0);
22646 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22648 output_addr_const (fp, val);
22653 fputs ("(tlsgd)", fp);
22656 fputs ("(tlsldm)", fp);
22659 fputs ("(tlsldo)", fp);
22662 fputs ("(gottpoff)", fp);
22665 fputs ("(tpoff)", fp);
22668 gcc_unreachable ();
22676 fputs (" + (. - ", fp);
22677 output_addr_const (fp, XVECEXP (x, 0, 2));
22679 output_addr_const (fp, XVECEXP (x, 0, 3));
22689 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22692 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22694 gcc_assert (size == 4);
22695 fputs ("\t.word\t", file);
22696 output_addr_const (file, x);
22697 fputs ("(tlsldo)", file);
22700 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22703 arm_output_addr_const_extra (FILE *fp, rtx x)
22705 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22706 return arm_emit_tls_decoration (fp, x);
22707 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22710 int labelno = INTVAL (XVECEXP (x, 0, 0));
22712 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22713 assemble_name_raw (fp, label);
22717 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22719 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22723 output_addr_const (fp, XVECEXP (x, 0, 0));
22727 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22729 output_addr_const (fp, XVECEXP (x, 0, 0));
22733 output_addr_const (fp, XVECEXP (x, 0, 1));
22737 else if (GET_CODE (x) == CONST_VECTOR)
22738 return arm_emit_vector_const (fp, x);
22743 /* Output assembly for a shift instruction.
22744 SET_FLAGS determines how the instruction modifies the condition codes.
22745 0 - Do not set condition codes.
22746 1 - Set condition codes.
22747 2 - Use smallest instruction. */
22749 arm_output_shift(rtx * operands, int set_flags)
22752 static const char flag_chars[3] = {'?', '.', '!'};
22757 c = flag_chars[set_flags];
22758 if (TARGET_UNIFIED_ASM)
22760 shift = shift_op(operands[3], &val);
22764 operands[2] = GEN_INT(val);
22765 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22768 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22771 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22772 output_asm_insn (pattern, operands);
22776 /* Output a Thumb-1 casesi dispatch sequence. */
22778 thumb1_output_casesi (rtx *operands)
22780 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22782 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22784 switch (GET_MODE(diff_vec))
22787 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22788 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22790 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22791 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22793 return "bl\t%___gnu_thumb1_case_si";
22795 gcc_unreachable ();
22799 /* Output a Thumb-2 casesi instruction. */
22801 thumb2_output_casesi (rtx *operands)
22803 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22805 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22807 output_asm_insn ("cmp\t%0, %1", operands);
22808 output_asm_insn ("bhi\t%l3", operands);
22809 switch (GET_MODE(diff_vec))
22812 return "tbb\t[%|pc, %0]";
22814 return "tbh\t[%|pc, %0, lsl #1]";
22818 output_asm_insn ("adr\t%4, %l2", operands);
22819 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22820 output_asm_insn ("add\t%4, %4, %5", operands);
22825 output_asm_insn ("adr\t%4, %l2", operands);
22826 return "ldr\t%|pc, [%4, %0, lsl #2]";
22829 gcc_unreachable ();
22833 /* Most ARM cores are single issue, but some newer ones can dual issue.
22834 The scheduler descriptions rely on this being correct. */
22836 arm_issue_rate (void)
22853 /* A table and a function to perform ARM-specific name mangling for
22854 NEON vector types in order to conform to the AAPCS (see "Procedure
22855 Call Standard for the ARM Architecture", Appendix A). To qualify
22856 for emission with the mangled names defined in that document, a
22857 vector type must not only be of the correct mode but also be
22858 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22861 enum machine_mode mode;
22862 const char *element_type_name;
22863 const char *aapcs_name;
22864 } arm_mangle_map_entry;
22866 static arm_mangle_map_entry arm_mangle_map[] = {
22867 /* 64-bit containerized types. */
22868 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22869 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22870 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22871 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22872 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22873 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22874 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22875 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22876 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22877 /* 128-bit containerized types. */
22878 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22879 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22880 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22881 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22882 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22883 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22884 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22885 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22886 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22887 { VOIDmode, NULL, NULL }
22891 arm_mangle_type (const_tree type)
22893 arm_mangle_map_entry *pos = arm_mangle_map;
22895 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22896 has to be managled as if it is in the "std" namespace. */
22897 if (TARGET_AAPCS_BASED
22898 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22900 static bool warned;
22901 if (!warned && warn_psabi && !in_system_header)
22904 inform (input_location,
22905 "the mangling of %<va_list%> has changed in GCC 4.4");
22907 return "St9__va_list";
22910 /* Half-precision float. */
22911 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22914 if (TREE_CODE (type) != VECTOR_TYPE)
22917 /* Check the mode of the vector type, and the name of the vector
22918 element type, against the table. */
22919 while (pos->mode != VOIDmode)
22921 tree elt_type = TREE_TYPE (type);
22923 if (pos->mode == TYPE_MODE (type)
22924 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22925 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22926 pos->element_type_name))
22927 return pos->aapcs_name;
22932 /* Use the default mangling for unrecognized (possibly user-defined)
22937 /* Order of allocation of core registers for Thumb: this allocation is
22938 written over the corresponding initial entries of the array
22939 initialized with REG_ALLOC_ORDER. We allocate all low registers
22940 first. Saving and restoring a low register is usually cheaper than
22941 using a call-clobbered high register. */
22943 static const int thumb_core_reg_alloc_order[] =
22945 3, 2, 1, 0, 4, 5, 6, 7,
22946 14, 12, 8, 9, 10, 11, 13, 15
22949 /* Adjust register allocation order when compiling for Thumb. */
22952 arm_order_regs_for_local_alloc (void)
22954 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22955 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22957 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22958 sizeof (thumb_core_reg_alloc_order));
22961 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22964 arm_frame_pointer_required (void)
22966 return (cfun->has_nonlocal_label
22967 || SUBTARGET_FRAME_POINTER_REQUIRED
22968 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22971 /* Only thumb1 can't support conditional execution, so return true if
22972 the target is not thumb1. */
22974 arm_have_conditional_execution (void)
22976 return !TARGET_THUMB1;
22979 /* Legitimize a memory reference for sync primitive implemented using
22980 ldrex / strex. We currently force the form of the reference to be
22981 indirect without offset. We do not yet support the indirect offset
22982 addressing supported by some ARM targets for these
22985 arm_legitimize_sync_memory (rtx memory)
22987 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22988 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22990 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22991 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22992 return legitimate_memory;
22995 /* An instruction emitter. */
22996 typedef void (* emit_f) (int label, const char *, rtx *);
22998 /* An instruction emitter that emits via the conventional
22999 output_asm_insn. */
23001 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23003 output_asm_insn (pattern, operands);
23006 /* Count the number of emitted synchronization instructions. */
23007 static unsigned arm_insn_count;
23009 /* An emitter that counts emitted instructions but does not actually
23010 emit instruction into the the instruction stream. */
23012 arm_count (int label,
23013 const char *pattern ATTRIBUTE_UNUSED,
23014 rtx *operands ATTRIBUTE_UNUSED)
23020 /* Construct a pattern using conventional output formatting and feed
23021 it to output_asm_insn. Provides a mechanism to construct the
23022 output pattern on the fly. Note the hard limit on the pattern
23024 static void ATTRIBUTE_PRINTF_4
23025 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23026 const char *pattern, ...)
23031 va_start (ap, pattern);
23032 vsprintf (buffer, pattern, ap);
23034 emit (label, buffer, operands);
23037 /* Emit the memory barrier instruction, if any, provided by this
23038 target to a specified emitter. */
23040 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23042 if (TARGET_HAVE_DMB)
23044 /* Note we issue a system level barrier. We should consider
23045 issuing a inner shareabilty zone barrier here instead, ie.
23047 emit (0, "dmb\tsy", operands);
23051 if (TARGET_HAVE_DMB_MCR)
23053 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23057 gcc_unreachable ();
23060 /* Emit the memory barrier instruction, if any, provided by this
23063 arm_output_memory_barrier (rtx *operands)
23065 arm_process_output_memory_barrier (arm_emit, operands);
23069 /* Helper to figure out the instruction suffix required on ldrex/strex
23070 for operations on an object of the specified mode. */
23071 static const char *
23072 arm_ldrex_suffix (enum machine_mode mode)
23076 case QImode: return "b";
23077 case HImode: return "h";
23078 case SImode: return "";
23079 case DImode: return "d";
23081 gcc_unreachable ();
23086 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23089 arm_output_ldrex (emit_f emit,
23090 enum machine_mode mode,
23094 const char *suffix = arm_ldrex_suffix (mode);
23097 operands[0] = target;
23098 operands[1] = memory;
23099 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23102 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23105 arm_output_strex (emit_f emit,
23106 enum machine_mode mode,
23112 const char *suffix = arm_ldrex_suffix (mode);
23115 operands[0] = result;
23116 operands[1] = value;
23117 operands[2] = memory;
23118 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23122 /* Helper to emit a two operand instruction. */
23124 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23130 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23133 /* Helper to emit a three operand instruction. */
23135 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23142 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23145 /* Emit a load store exclusive synchronization loop.
23149 if old_value != required_value
23151 t1 = sync_op (old_value, new_value)
23152 [mem] = t1, t2 = [0|1]
23156 t1 == t2 is not permitted
23157 t1 == old_value is permitted
23161 RTX register or const_int representing the required old_value for
23162 the modify to continue, if NULL no comparsion is performed. */
23164 arm_output_sync_loop (emit_f emit,
23165 enum machine_mode mode,
23168 rtx required_value,
23172 enum attr_sync_op sync_op,
23173 int early_barrier_required)
23177 gcc_assert (t1 != t2);
23179 if (early_barrier_required)
23180 arm_process_output_memory_barrier (emit, NULL);
23182 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23184 arm_output_ldrex (emit, mode, old_value, memory);
23186 if (required_value)
23190 operands[0] = old_value;
23191 operands[1] = required_value;
23192 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23193 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23199 arm_output_op3 (emit, "add", t1, old_value, new_value);
23203 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23207 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23211 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23215 arm_output_op3 (emit,"and", t1, old_value, new_value);
23219 arm_output_op3 (emit, "and", t1, old_value, new_value);
23220 arm_output_op2 (emit, "mvn", t1, t1);
23230 arm_output_strex (emit, mode, "", t2, t1, memory);
23232 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23233 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23234 LOCAL_LABEL_PREFIX);
23238 /* Use old_value for the return value because for some operations
23239 the old_value can easily be restored. This saves one register. */
23240 arm_output_strex (emit, mode, "", old_value, t1, memory);
23241 operands[0] = old_value;
23242 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23243 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23244 LOCAL_LABEL_PREFIX);
23249 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23253 arm_output_op3 (emit, "add", old_value, t1, new_value);
23257 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23261 arm_output_op2 (emit, "mov", old_value, required_value);
23265 gcc_unreachable ();
23269 arm_process_output_memory_barrier (emit, NULL);
23270 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23274 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23277 default_value = operands[index - 1];
23279 return default_value;
23282 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23283 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23285 /* Extract the operands for a synchroniztion instruction from the
23286 instructions attributes and emit the instruction. */
23288 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23290 rtx result, memory, required_value, new_value, t1, t2;
23292 enum machine_mode mode;
23293 enum attr_sync_op sync_op;
23295 result = FETCH_SYNC_OPERAND(result, 0);
23296 memory = FETCH_SYNC_OPERAND(memory, 0);
23297 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23298 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23299 t1 = FETCH_SYNC_OPERAND(t1, 0);
23300 t2 = FETCH_SYNC_OPERAND(t2, 0);
23302 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23303 sync_op = get_attr_sync_op (insn);
23304 mode = GET_MODE (memory);
23306 arm_output_sync_loop (emit, mode, result, memory, required_value,
23307 new_value, t1, t2, sync_op, early_barrier);
23310 /* Emit a synchronization instruction loop. */
23312 arm_output_sync_insn (rtx insn, rtx *operands)
23314 arm_process_output_sync_insn (arm_emit, insn, operands);
23318 /* Count the number of machine instruction that will be emitted for a
23319 synchronization instruction. Note that the emitter used does not
23320 emit instructions, it just counts instructions being carefull not
23321 to count labels. */
23323 arm_sync_loop_insns (rtx insn, rtx *operands)
23325 arm_insn_count = 0;
23326 arm_process_output_sync_insn (arm_count, insn, operands);
23327 return arm_insn_count;
23330 /* Helper to call a target sync instruction generator, dealing with
23331 the variation in operands required by the different generators. */
23333 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23334 rtx memory, rtx required_value, rtx new_value)
23336 switch (generator->op)
23338 case arm_sync_generator_omn:
23339 gcc_assert (! required_value);
23340 return generator->u.omn (old_value, memory, new_value);
23342 case arm_sync_generator_omrn:
23343 gcc_assert (required_value);
23344 return generator->u.omrn (old_value, memory, required_value, new_value);
23350 /* Expand a synchronization loop. The synchronization loop is expanded
23351 as an opaque block of instructions in order to ensure that we do
23352 not subsequently get extraneous memory accesses inserted within the
23353 critical region. The exclusive access property of ldrex/strex is
23354 only guaranteed in there are no intervening memory accesses. */
23356 arm_expand_sync (enum machine_mode mode,
23357 struct arm_sync_generator *generator,
23358 rtx target, rtx memory, rtx required_value, rtx new_value)
23360 if (target == NULL)
23361 target = gen_reg_rtx (mode);
23363 memory = arm_legitimize_sync_memory (memory);
23364 if (mode != SImode)
23366 rtx load_temp = gen_reg_rtx (SImode);
23368 if (required_value)
23369 required_value = convert_modes (SImode, mode, required_value, true);
23371 new_value = convert_modes (SImode, mode, new_value, true);
23372 emit_insn (arm_call_generator (generator, load_temp, memory,
23373 required_value, new_value));
23374 emit_move_insn (target, gen_lowpart (mode, load_temp));
23378 emit_insn (arm_call_generator (generator, target, memory, required_value,
23383 static unsigned int
23384 arm_autovectorize_vector_sizes (void)
23386 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23390 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23392 /* Vectors which aren't in packed structures will not be less aligned than
23393 the natural alignment of their element type, so this is safe. */
23394 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23397 return default_builtin_vector_alignment_reachable (type, is_packed);
23401 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23402 const_tree type, int misalignment,
23405 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23407 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23412 /* If the misalignment is unknown, we should be able to handle the access
23413 so long as it is not to a member of a packed data structure. */
23414 if (misalignment == -1)
23417 /* Return true if the misalignment is a multiple of the natural alignment
23418 of the vector's element type. This is probably always going to be
23419 true in practice, since we've already established that this isn't a
23421 return ((misalignment % align) == 0);
23424 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23429 arm_conditional_register_usage (void)
23433 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23435 for (regno = FIRST_FPA_REGNUM;
23436 regno <= LAST_FPA_REGNUM; ++regno)
23437 fixed_regs[regno] = call_used_regs[regno] = 1;
23440 if (TARGET_THUMB1 && optimize_size)
23442 /* When optimizing for size on Thumb-1, it's better not
23443 to use the HI regs, because of the overhead of
23445 for (regno = FIRST_HI_REGNUM;
23446 regno <= LAST_HI_REGNUM; ++regno)
23447 fixed_regs[regno] = call_used_regs[regno] = 1;
23450 /* The link register can be clobbered by any branch insn,
23451 but we have no way to track that at present, so mark
23452 it as unavailable. */
23454 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23456 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23458 if (TARGET_MAVERICK)
23460 for (regno = FIRST_FPA_REGNUM;
23461 regno <= LAST_FPA_REGNUM; ++ regno)
23462 fixed_regs[regno] = call_used_regs[regno] = 1;
23463 for (regno = FIRST_CIRRUS_FP_REGNUM;
23464 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23466 fixed_regs[regno] = 0;
23467 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23472 /* VFPv3 registers are disabled when earlier VFP
23473 versions are selected due to the definition of
23474 LAST_VFP_REGNUM. */
23475 for (regno = FIRST_VFP_REGNUM;
23476 regno <= LAST_VFP_REGNUM; ++ regno)
23478 fixed_regs[regno] = 0;
23479 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23480 || regno >= FIRST_VFP_REGNUM + 32;
23485 if (TARGET_REALLY_IWMMXT)
23487 regno = FIRST_IWMMXT_GR_REGNUM;
23488 /* The 2002/10/09 revision of the XScale ABI has wCG0
23489 and wCG1 as call-preserved registers. The 2002/11/21
23490 revision changed this so that all wCG registers are
23491 scratch registers. */
23492 for (regno = FIRST_IWMMXT_GR_REGNUM;
23493 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23494 fixed_regs[regno] = 0;
23495 /* The XScale ABI has wR0 - wR9 as scratch registers,
23496 the rest as call-preserved registers. */
23497 for (regno = FIRST_IWMMXT_REGNUM;
23498 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23500 fixed_regs[regno] = 0;
23501 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23505 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23507 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23508 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23510 else if (TARGET_APCS_STACK)
23512 fixed_regs[10] = 1;
23513 call_used_regs[10] = 1;
23515 /* -mcaller-super-interworking reserves r11 for calls to
23516 _interwork_r11_call_via_rN(). Making the register global
23517 is an easy way of ensuring that it remains valid for all
23519 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23520 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23522 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23523 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23524 if (TARGET_CALLER_INTERWORKING)
23525 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23527 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23531 arm_preferred_rename_class (reg_class_t rclass)
23533 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23534 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23535 and code size can be reduced. */
23536 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23542 /* Compute the atrribute "length" of insn "*push_multi".
23543 So this function MUST be kept in sync with that insn pattern. */
23545 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23547 int i, regno, hi_reg;
23548 int num_saves = XVECLEN (parallel_op, 0);
23555 regno = REGNO (first_op);
23556 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23557 for (i = 1; i < num_saves && !hi_reg; i++)
23559 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23560 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23568 #include "gt-arm.h"