1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
148 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
149 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_rtx_costs (rtx, int, int, int *, bool);
154 static int arm_address_cost (rtx, bool);
155 static bool arm_memory_load_p (rtx);
156 static bool arm_cirrus_insn_p (rtx);
157 static void cirrus_reorg (rtx);
158 static void arm_init_builtins (void);
159 static void arm_init_iwmmxt_builtins (void);
160 static rtx safe_vector_operand (rtx, enum machine_mode);
161 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
162 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
163 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
164 static void emit_constant_insn (rtx cond, rtx pattern);
165 static rtx emit_set_insn (rtx, rtx);
166 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
168 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
170 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
172 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
173 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
175 static int aapcs_select_return_coproc (const_tree, const_tree);
177 #ifdef OBJECT_FORMAT_ELF
178 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
179 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_encode_section_info (tree, rtx, int);
185 static void arm_file_end (void);
186 static void arm_file_start (void);
188 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
191 enum machine_mode, const_tree, bool);
192 static bool arm_promote_prototypes (const_tree);
193 static bool arm_default_short_enums (void);
194 static bool arm_align_anon_bitfield (void);
195 static bool arm_return_in_msb (const_tree);
196 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
197 static bool arm_return_in_memory (const_tree, const_tree);
199 static void arm_unwind_emit (FILE *, rtx);
200 static bool arm_output_ttype (rtx);
201 static void arm_asm_emit_except_personality (rtx);
202 static void arm_asm_init_sections (void);
204 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
205 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
206 static rtx arm_dwarf_register_span (rtx);
208 static tree arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree arm_get_cookie_size (tree);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree, rtx);
220 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
221 static void arm_option_override (void);
222 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
223 static bool arm_cannot_copy_insn_p (rtx);
224 static bool arm_tls_symbol_p (rtx x);
225 static int arm_issue_rate (void);
226 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
227 static bool arm_output_addr_const_extra (FILE *, rtx);
228 static bool arm_allocate_stack_slots_for_args (void);
229 static const char *arm_invalid_parameter_type (const_tree t);
230 static const char *arm_invalid_return_type (const_tree t);
231 static tree arm_promoted_type (const_tree t);
232 static tree arm_convert_to_type (tree type, tree expr);
233 static bool arm_scalar_mode_supported_p (enum machine_mode);
234 static bool arm_frame_pointer_required (void);
235 static bool arm_can_eliminate (const int, const int);
236 static void arm_asm_trampoline_template (FILE *);
237 static void arm_trampoline_init (rtx, tree, rtx);
238 static rtx arm_trampoline_adjust_address (rtx);
239 static rtx arm_pic_static_addr (rtx orig, rtx reg);
240 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
241 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
250 static void arm_conditional_register_usage (void);
251 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
252 static unsigned int arm_autovectorize_vector_sizes (void);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
259 affects_type_identity } */
260 /* Function calls made to this symbol must be done indirectly, because
261 it may lie outside of the 26 bit addressing range of a normal function
263 { "long_call", 0, 0, false, true, true, NULL, false },
264 /* Whereas these functions are always known to reside within the 26 bit
266 { "short_call", 0, 0, false, true, true, NULL, false },
267 /* Specify the procedure call conventions for a function. */
268 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
270 /* Interrupt Service Routines have special prologue and epilogue requirements. */
271 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
273 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
275 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
278 /* ARM/PE has three new attributes:
280 dllexport - for exporting a function/variable that will live in a dll
281 dllimport - for importing a function/variable from a dll
283 Microsoft allows multiple declspecs in one __declspec, separating
284 them with spaces. We do NOT support this. Instead, use __declspec
287 { "dllimport", 0, 0, true, false, false, NULL, false },
288 { "dllexport", 0, 0, true, false, false, NULL, false },
289 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
291 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
292 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
293 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
294 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
297 { NULL, 0, 0, false, false, false, NULL, false }
300 /* Set default optimization options. */
301 static const struct default_options arm_option_optimization_table[] =
303 /* Enable section anchors by default at -O1 or higher. */
304 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
305 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
306 { OPT_LEVELS_NONE, 0, NULL, 0 }
309 /* Initialize the GCC target structure. */
310 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 #undef TARGET_MERGE_DECL_ATTRIBUTES
312 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
315 #undef TARGET_LEGITIMIZE_ADDRESS
316 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
318 #undef TARGET_ATTRIBUTE_TABLE
319 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
321 #undef TARGET_ASM_FILE_START
322 #define TARGET_ASM_FILE_START arm_file_start
323 #undef TARGET_ASM_FILE_END
324 #define TARGET_ASM_FILE_END arm_file_end
326 #undef TARGET_ASM_ALIGNED_SI_OP
327 #define TARGET_ASM_ALIGNED_SI_OP NULL
328 #undef TARGET_ASM_INTEGER
329 #define TARGET_ASM_INTEGER arm_assemble_integer
331 #undef TARGET_PRINT_OPERAND
332 #define TARGET_PRINT_OPERAND arm_print_operand
333 #undef TARGET_PRINT_OPERAND_ADDRESS
334 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
335 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
336 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
338 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
339 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
341 #undef TARGET_ASM_FUNCTION_PROLOGUE
342 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
344 #undef TARGET_ASM_FUNCTION_EPILOGUE
345 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
347 #undef TARGET_DEFAULT_TARGET_FLAGS
348 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
349 #undef TARGET_OPTION_OVERRIDE
350 #define TARGET_OPTION_OVERRIDE arm_option_override
351 #undef TARGET_OPTION_OPTIMIZATION_TABLE
352 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
354 #undef TARGET_COMP_TYPE_ATTRIBUTES
355 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
357 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
358 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
360 #undef TARGET_SCHED_ADJUST_COST
361 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
363 #undef TARGET_ENCODE_SECTION_INFO
365 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
367 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
370 #undef TARGET_STRIP_NAME_ENCODING
371 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
373 #undef TARGET_ASM_INTERNAL_LABEL
374 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
377 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
379 #undef TARGET_FUNCTION_VALUE
380 #define TARGET_FUNCTION_VALUE arm_function_value
382 #undef TARGET_LIBCALL_VALUE
383 #define TARGET_LIBCALL_VALUE arm_libcall_value
385 #undef TARGET_ASM_OUTPUT_MI_THUNK
386 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
390 #undef TARGET_RTX_COSTS
391 #define TARGET_RTX_COSTS arm_rtx_costs
392 #undef TARGET_ADDRESS_COST
393 #define TARGET_ADDRESS_COST arm_address_cost
395 #undef TARGET_SHIFT_TRUNCATION_MASK
396 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
397 #undef TARGET_VECTOR_MODE_SUPPORTED_P
398 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
399 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
400 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
401 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
402 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
403 arm_autovectorize_vector_sizes
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
408 #undef TARGET_INIT_BUILTINS
409 #define TARGET_INIT_BUILTINS arm_init_builtins
410 #undef TARGET_EXPAND_BUILTIN
411 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
413 #undef TARGET_INIT_LIBFUNCS
414 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
416 #undef TARGET_PROMOTE_FUNCTION_MODE
417 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
418 #undef TARGET_PROMOTE_PROTOTYPES
419 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
420 #undef TARGET_PASS_BY_REFERENCE
421 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
422 #undef TARGET_ARG_PARTIAL_BYTES
423 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
424 #undef TARGET_FUNCTION_ARG
425 #define TARGET_FUNCTION_ARG arm_function_arg
426 #undef TARGET_FUNCTION_ARG_ADVANCE
427 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
428 #undef TARGET_FUNCTION_ARG_BOUNDARY
429 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
434 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
435 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
437 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
438 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
439 #undef TARGET_TRAMPOLINE_INIT
440 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
441 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
442 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
444 #undef TARGET_DEFAULT_SHORT_ENUMS
445 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
447 #undef TARGET_ALIGN_ANON_BITFIELD
448 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
450 #undef TARGET_NARROW_VOLATILE_BITFIELD
451 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
453 #undef TARGET_CXX_GUARD_TYPE
454 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
456 #undef TARGET_CXX_GUARD_MASK_BIT
457 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
459 #undef TARGET_CXX_GET_COOKIE_SIZE
460 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
462 #undef TARGET_CXX_COOKIE_HAS_SIZE
463 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
465 #undef TARGET_CXX_CDTOR_RETURNS_THIS
466 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
468 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
469 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
471 #undef TARGET_CXX_USE_AEABI_ATEXIT
472 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
474 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
475 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
476 arm_cxx_determine_class_data_visibility
478 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
479 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
481 #undef TARGET_RETURN_IN_MSB
482 #define TARGET_RETURN_IN_MSB arm_return_in_msb
484 #undef TARGET_RETURN_IN_MEMORY
485 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
487 #undef TARGET_MUST_PASS_IN_STACK
488 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
491 #undef TARGET_ASM_UNWIND_EMIT
492 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
494 /* EABI unwinding tables use a different format for the typeinfo tables. */
495 #undef TARGET_ASM_TTYPE
496 #define TARGET_ASM_TTYPE arm_output_ttype
498 #undef TARGET_ARM_EABI_UNWINDER
499 #define TARGET_ARM_EABI_UNWINDER true
501 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
502 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
504 #undef TARGET_ASM_INIT_SECTIONS
505 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
506 #endif /* ARM_UNWIND_INFO */
508 #undef TARGET_EXCEPT_UNWIND_INFO
509 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
511 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
512 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
514 #undef TARGET_DWARF_REGISTER_SPAN
515 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
517 #undef TARGET_CANNOT_COPY_INSN_P
518 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
521 #undef TARGET_HAVE_TLS
522 #define TARGET_HAVE_TLS true
525 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
526 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
528 #undef TARGET_CANNOT_FORCE_CONST_MEM
529 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
531 #undef TARGET_MAX_ANCHOR_OFFSET
532 #define TARGET_MAX_ANCHOR_OFFSET 4095
534 /* The minimum is set such that the total size of the block
535 for a particular anchor is -4088 + 1 + 4095 bytes, which is
536 divisible by eight, ensuring natural spacing of anchors. */
537 #undef TARGET_MIN_ANCHOR_OFFSET
538 #define TARGET_MIN_ANCHOR_OFFSET -4088
540 #undef TARGET_SCHED_ISSUE_RATE
541 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
543 #undef TARGET_MANGLE_TYPE
544 #define TARGET_MANGLE_TYPE arm_mangle_type
546 #undef TARGET_BUILD_BUILTIN_VA_LIST
547 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
548 #undef TARGET_EXPAND_BUILTIN_VA_START
549 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
550 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
551 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
554 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
555 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
558 #undef TARGET_LEGITIMATE_ADDRESS_P
559 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
561 #undef TARGET_INVALID_PARAMETER_TYPE
562 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
564 #undef TARGET_INVALID_RETURN_TYPE
565 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
567 #undef TARGET_PROMOTED_TYPE
568 #define TARGET_PROMOTED_TYPE arm_promoted_type
570 #undef TARGET_CONVERT_TO_TYPE
571 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
573 #undef TARGET_SCALAR_MODE_SUPPORTED_P
574 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
576 #undef TARGET_FRAME_POINTER_REQUIRED
577 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
579 #undef TARGET_CAN_ELIMINATE
580 #define TARGET_CAN_ELIMINATE arm_can_eliminate
582 #undef TARGET_CONDITIONAL_REGISTER_USAGE
583 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
585 #undef TARGET_CLASS_LIKELY_SPILLED_P
586 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
588 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
589 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
590 arm_vector_alignment_reachable
592 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
593 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
594 arm_builtin_support_vector_misalignment
596 #undef TARGET_PREFERRED_RENAME_CLASS
597 #define TARGET_PREFERRED_RENAME_CLASS \
598 arm_preferred_rename_class
600 struct gcc_target targetm = TARGET_INITIALIZER;
602 /* Obstack for minipool constant handling. */
603 static struct obstack minipool_obstack;
604 static char * minipool_startobj;
606 /* The maximum number of insns skipped which
607 will be conditionalised if possible. */
608 static int max_insns_skipped = 5;
610 extern FILE * asm_out_file;
612 /* True if we are currently building a constant table. */
613 int making_const_table;
615 /* The processor for which instructions should be scheduled. */
616 enum processor_type arm_tune = arm_none;
618 /* The current tuning set. */
619 const struct tune_params *current_tune;
621 /* Which floating point hardware to schedule for. */
624 /* Which floating popint hardware to use. */
625 const struct arm_fpu_desc *arm_fpu_desc;
627 /* Whether to use floating point hardware. */
628 enum float_abi_type arm_float_abi;
630 /* Which __fp16 format to use. */
631 enum arm_fp16_format_type arm_fp16_format;
633 /* Which ABI to use. */
634 enum arm_abi_type arm_abi;
636 /* Which thread pointer model to use. */
637 enum arm_tp_type target_thread_pointer = TP_AUTO;
639 /* Used to parse -mstructure_size_boundary command line option. */
640 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
642 /* Used for Thumb call_via trampolines. */
643 rtx thumb_call_via_label[14];
644 static int thumb_call_reg_needed;
646 /* Bit values used to identify processor capabilities. */
647 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
648 #define FL_ARCH3M (1 << 1) /* Extended multiply */
649 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
650 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
651 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
652 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
653 #define FL_THUMB (1 << 6) /* Thumb aware */
654 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
655 #define FL_STRONG (1 << 8) /* StrongARM */
656 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
657 #define FL_XSCALE (1 << 10) /* XScale */
658 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
659 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
660 media instructions. */
661 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
662 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
663 Note: ARM6 & 7 derivatives only. */
664 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
665 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
666 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
668 #define FL_DIV (1 << 18) /* Hardware divide. */
669 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
670 #define FL_NEON (1 << 20) /* Neon instructions. */
671 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
673 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
675 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
677 /* Flags that only effect tuning, not available instructions. */
678 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
681 #define FL_FOR_ARCH2 FL_NOTM
682 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
683 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
684 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
685 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
686 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
687 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
688 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
689 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
690 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
691 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
692 #define FL_FOR_ARCH6J FL_FOR_ARCH6
693 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
694 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
695 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
696 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
697 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
698 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
699 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
700 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
701 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
702 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
704 /* The bits in this mask specify which
705 instructions we are allowed to generate. */
706 static unsigned long insn_flags = 0;
708 /* The bits in this mask specify which instruction scheduling options should
710 static unsigned long tune_flags = 0;
712 /* The following are used in the arm.md file as equivalents to bits
713 in the above two flag variables. */
715 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
721 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
724 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
727 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
730 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
733 /* Nonzero if this chip supports the ARM 6K extensions. */
736 /* Nonzero if this chip supports the ARM 7 extensions. */
739 /* Nonzero if instructions not present in the 'M' profile can be used. */
740 int arm_arch_notm = 0;
742 /* Nonzero if instructions present in ARMv7E-M can be used. */
745 /* Nonzero if this chip can benefit from load scheduling. */
746 int arm_ld_sched = 0;
748 /* Nonzero if this chip is a StrongARM. */
749 int arm_tune_strongarm = 0;
751 /* Nonzero if this chip is a Cirrus variant. */
752 int arm_arch_cirrus = 0;
754 /* Nonzero if this chip supports Intel Wireless MMX technology. */
755 int arm_arch_iwmmxt = 0;
757 /* Nonzero if this chip is an XScale. */
758 int arm_arch_xscale = 0;
760 /* Nonzero if tuning for XScale */
761 int arm_tune_xscale = 0;
763 /* Nonzero if we want to tune for stores that access the write-buffer.
764 This typically means an ARM6 or ARM7 with MMU or MPU. */
765 int arm_tune_wbuf = 0;
767 /* Nonzero if tuning for Cortex-A9. */
768 int arm_tune_cortex_a9 = 0;
770 /* Nonzero if generating Thumb instructions. */
773 /* Nonzero if generating Thumb-1 instructions. */
776 /* Nonzero if we should define __THUMB_INTERWORK__ in the
778 XXX This is a bit of a hack, it's intended to help work around
779 problems in GLD which doesn't understand that armv5t code is
780 interworking clean. */
781 int arm_cpp_interwork = 0;
783 /* Nonzero if chip supports Thumb 2. */
786 /* Nonzero if chip supports integer division instruction. */
789 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
790 we must report the mode of the memory reference from
791 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
792 enum machine_mode output_memory_reference_mode;
794 /* The register number to be used for the PIC offset register. */
795 unsigned arm_pic_register = INVALID_REGNUM;
797 /* Set to 1 after arm_reorg has started. Reset to start at the start of
798 the next function. */
799 static int after_arm_reorg = 0;
801 enum arm_pcs arm_pcs_default;
803 /* For an explanation of these variables, see final_prescan_insn below. */
805 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
806 enum arm_cond_code arm_current_cc;
809 int arm_target_label;
810 /* The number of conditionally executed insns, including the current insn. */
811 int arm_condexec_count = 0;
812 /* A bitmask specifying the patterns for the IT block.
813 Zero means do not output an IT block before this insn. */
814 int arm_condexec_mask = 0;
815 /* The number of bits used in arm_condexec_mask. */
816 int arm_condexec_masklen = 0;
818 /* The condition codes of the ARM, and the inverse function. */
819 static const char * const arm_condition_codes[] =
821 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
822 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
825 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
826 int arm_regs_in_sequence[] =
828 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
831 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
832 #define streq(string1, string2) (strcmp (string1, string2) == 0)
834 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
835 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
836 | (1 << PIC_OFFSET_TABLE_REGNUM)))
838 /* Initialization code. */
842 const char *const name;
843 enum processor_type core;
845 const unsigned long flags;
846 const struct tune_params *const tune;
850 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
851 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
856 const struct tune_params arm_slowmul_tune =
858 arm_slowmul_rtx_costs,
861 ARM_PREFETCH_NOT_BENEFICIAL
864 const struct tune_params arm_fastmul_tune =
866 arm_fastmul_rtx_costs,
869 ARM_PREFETCH_NOT_BENEFICIAL
872 const struct tune_params arm_xscale_tune =
874 arm_xscale_rtx_costs,
875 xscale_sched_adjust_cost,
877 ARM_PREFETCH_NOT_BENEFICIAL
880 const struct tune_params arm_9e_tune =
885 ARM_PREFETCH_NOT_BENEFICIAL
888 const struct tune_params arm_cortex_a9_tune =
891 cortex_a9_sched_adjust_cost,
893 ARM_PREFETCH_BENEFICIAL(4,32,32)
896 const struct tune_params arm_fa726te_tune =
899 fa726te_sched_adjust_cost,
901 ARM_PREFETCH_NOT_BENEFICIAL
905 /* Not all of these give usefully different compilation alternatives,
906 but there is no simple way of generalizing them. */
907 static const struct processors all_cores[] =
910 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
911 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
912 #include "arm-cores.def"
914 {NULL, arm_none, NULL, 0, NULL}
917 static const struct processors all_architectures[] =
919 /* ARM Architectures */
920 /* We don't specify tuning costs here as it will be figured out
923 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
924 {NAME, CORE, #ARCH, FLAGS, NULL},
925 #include "arm-arches.def"
927 {NULL, arm_none, NULL, 0 , NULL}
931 /* These are populated as commandline arguments are processed, or NULL
933 static const struct processors *arm_selected_arch;
934 static const struct processors *arm_selected_cpu;
935 static const struct processors *arm_selected_tune;
937 /* The name of the preprocessor macro to define for this architecture. */
939 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
941 /* Available values for -mfpu=. */
943 static const struct arm_fpu_desc all_fpus[] =
945 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
946 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
947 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
948 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
949 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
950 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
951 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
952 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
953 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
954 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
955 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
956 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
957 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
958 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
959 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
960 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
961 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
962 /* Compatibility aliases. */
963 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
970 enum float_abi_type abi_type;
974 /* Available values for -mfloat-abi=. */
976 static const struct float_abi all_float_abis[] =
978 {"soft", ARM_FLOAT_ABI_SOFT},
979 {"softfp", ARM_FLOAT_ABI_SOFTFP},
980 {"hard", ARM_FLOAT_ABI_HARD}
987 enum arm_fp16_format_type fp16_format_type;
991 /* Available values for -mfp16-format=. */
993 static const struct fp16_format all_fp16_formats[] =
995 {"none", ARM_FP16_FORMAT_NONE},
996 {"ieee", ARM_FP16_FORMAT_IEEE},
997 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1004 enum arm_abi_type abi_type;
1008 /* Available values for -mabi=. */
1010 static const struct abi_name arm_all_abis[] =
1012 {"apcs-gnu", ARM_ABI_APCS},
1013 {"atpcs", ARM_ABI_ATPCS},
1014 {"aapcs", ARM_ABI_AAPCS},
1015 {"iwmmxt", ARM_ABI_IWMMXT},
1016 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1019 /* Supported TLS relocations. */
1029 /* The maximum number of insns to be used when loading a constant. */
1031 arm_constant_limit (bool size_p)
1033 return size_p ? 1 : current_tune->constant_limit;
1036 /* Emit an insn that's a simple single-set. Both the operands must be known
1039 emit_set_insn (rtx x, rtx y)
1041 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1044 /* Return the number of bits set in VALUE. */
1046 bit_count (unsigned long value)
1048 unsigned long count = 0;
1053 value &= value - 1; /* Clear the least-significant set bit. */
1059 /* Set up library functions unique to ARM. */
1062 arm_init_libfuncs (void)
1064 /* There are no special library functions unless we are using the
1069 /* The functions below are described in Section 4 of the "Run-Time
1070 ABI for the ARM architecture", Version 1.0. */
1072 /* Double-precision floating-point arithmetic. Table 2. */
1073 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1074 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1075 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1076 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1077 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1079 /* Double-precision comparisons. Table 3. */
1080 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1081 set_optab_libfunc (ne_optab, DFmode, NULL);
1082 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1083 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1084 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1085 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1086 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1088 /* Single-precision floating-point arithmetic. Table 4. */
1089 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1090 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1091 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1092 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1093 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1095 /* Single-precision comparisons. Table 5. */
1096 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1097 set_optab_libfunc (ne_optab, SFmode, NULL);
1098 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1099 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1100 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1101 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1102 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1104 /* Floating-point to integer conversions. Table 6. */
1105 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1106 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1107 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1108 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1109 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1110 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1111 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1112 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1114 /* Conversions between floating types. Table 7. */
1115 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1116 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1118 /* Integer to floating-point conversions. Table 8. */
1119 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1120 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1121 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1122 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1123 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1124 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1125 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1126 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1128 /* Long long. Table 9. */
1129 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1130 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1131 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1132 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1133 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1134 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1135 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1136 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1138 /* Integer (32/32->32) division. \S 4.3.1. */
1139 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1140 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1142 /* The divmod functions are designed so that they can be used for
1143 plain division, even though they return both the quotient and the
1144 remainder. The quotient is returned in the usual location (i.e.,
1145 r0 for SImode, {r0, r1} for DImode), just as would be expected
1146 for an ordinary division routine. Because the AAPCS calling
1147 conventions specify that all of { r0, r1, r2, r3 } are
1148 callee-saved registers, there is no need to tell the compiler
1149 explicitly that those registers are clobbered by these
1151 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1152 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1154 /* For SImode division the ABI provides div-without-mod routines,
1155 which are faster. */
1156 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1157 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1159 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1160 divmod libcalls instead. */
1161 set_optab_libfunc (smod_optab, DImode, NULL);
1162 set_optab_libfunc (umod_optab, DImode, NULL);
1163 set_optab_libfunc (smod_optab, SImode, NULL);
1164 set_optab_libfunc (umod_optab, SImode, NULL);
1166 /* Half-precision float operations. The compiler handles all operations
1167 with NULL libfuncs by converting the SFmode. */
1168 switch (arm_fp16_format)
1170 case ARM_FP16_FORMAT_IEEE:
1171 case ARM_FP16_FORMAT_ALTERNATIVE:
1174 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1175 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1177 : "__gnu_f2h_alternative"));
1178 set_conv_libfunc (sext_optab, SFmode, HFmode,
1179 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1181 : "__gnu_h2f_alternative"));
1184 set_optab_libfunc (add_optab, HFmode, NULL);
1185 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1186 set_optab_libfunc (smul_optab, HFmode, NULL);
1187 set_optab_libfunc (neg_optab, HFmode, NULL);
1188 set_optab_libfunc (sub_optab, HFmode, NULL);
1191 set_optab_libfunc (eq_optab, HFmode, NULL);
1192 set_optab_libfunc (ne_optab, HFmode, NULL);
1193 set_optab_libfunc (lt_optab, HFmode, NULL);
1194 set_optab_libfunc (le_optab, HFmode, NULL);
1195 set_optab_libfunc (ge_optab, HFmode, NULL);
1196 set_optab_libfunc (gt_optab, HFmode, NULL);
1197 set_optab_libfunc (unord_optab, HFmode, NULL);
1204 if (TARGET_AAPCS_BASED)
1205 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1208 /* On AAPCS systems, this is the "struct __va_list". */
1209 static GTY(()) tree va_list_type;
1211 /* Return the type to use as __builtin_va_list. */
1213 arm_build_builtin_va_list (void)
1218 if (!TARGET_AAPCS_BASED)
1219 return std_build_builtin_va_list ();
1221 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1229 The C Library ABI further reinforces this definition in \S
1232 We must follow this definition exactly. The structure tag
1233 name is visible in C++ mangled names, and thus forms a part
1234 of the ABI. The field name may be used by people who
1235 #include <stdarg.h>. */
1236 /* Create the type. */
1237 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1238 /* Give it the required name. */
1239 va_list_name = build_decl (BUILTINS_LOCATION,
1241 get_identifier ("__va_list"),
1243 DECL_ARTIFICIAL (va_list_name) = 1;
1244 TYPE_NAME (va_list_type) = va_list_name;
1245 TYPE_STUB_DECL (va_list_type) = va_list_name;
1246 /* Create the __ap field. */
1247 ap_field = build_decl (BUILTINS_LOCATION,
1249 get_identifier ("__ap"),
1251 DECL_ARTIFICIAL (ap_field) = 1;
1252 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1253 TYPE_FIELDS (va_list_type) = ap_field;
1254 /* Compute its layout. */
1255 layout_type (va_list_type);
1257 return va_list_type;
1260 /* Return an expression of type "void *" pointing to the next
1261 available argument in a variable-argument list. VALIST is the
1262 user-level va_list object, of type __builtin_va_list. */
1264 arm_extract_valist_ptr (tree valist)
1266 if (TREE_TYPE (valist) == error_mark_node)
1267 return error_mark_node;
1269 /* On an AAPCS target, the pointer is stored within "struct
1271 if (TARGET_AAPCS_BASED)
1273 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1274 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1275 valist, ap_field, NULL_TREE);
1281 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1283 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1285 valist = arm_extract_valist_ptr (valist);
1286 std_expand_builtin_va_start (valist, nextarg);
1289 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1291 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1294 valist = arm_extract_valist_ptr (valist);
1295 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1298 /* Fix up any incompatible options that the user has specified. */
1300 arm_option_override (void)
1304 if (global_options_set.x_arm_arch_option)
1305 arm_selected_arch = &all_architectures[arm_arch_option];
1307 if (global_options_set.x_arm_cpu_option)
1308 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1310 if (global_options_set.x_arm_tune_option)
1311 arm_selected_tune = &all_cores[(int) arm_tune_option];
1313 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1314 SUBTARGET_OVERRIDE_OPTIONS;
1317 if (arm_selected_arch)
1319 if (arm_selected_cpu)
1321 /* Check for conflict between mcpu and march. */
1322 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1324 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1325 arm_selected_cpu->name, arm_selected_arch->name);
1326 /* -march wins for code generation.
1327 -mcpu wins for default tuning. */
1328 if (!arm_selected_tune)
1329 arm_selected_tune = arm_selected_cpu;
1331 arm_selected_cpu = arm_selected_arch;
1335 arm_selected_arch = NULL;
1338 /* Pick a CPU based on the architecture. */
1339 arm_selected_cpu = arm_selected_arch;
1342 /* If the user did not specify a processor, choose one for them. */
1343 if (!arm_selected_cpu)
1345 const struct processors * sel;
1346 unsigned int sought;
1348 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1349 if (!arm_selected_cpu->name)
1351 #ifdef SUBTARGET_CPU_DEFAULT
1352 /* Use the subtarget default CPU if none was specified by
1354 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1356 /* Default to ARM6. */
1357 if (!arm_selected_cpu->name)
1358 arm_selected_cpu = &all_cores[arm6];
1361 sel = arm_selected_cpu;
1362 insn_flags = sel->flags;
1364 /* Now check to see if the user has specified some command line
1365 switch that require certain abilities from the cpu. */
1368 if (TARGET_INTERWORK || TARGET_THUMB)
1370 sought |= (FL_THUMB | FL_MODE32);
1372 /* There are no ARM processors that support both APCS-26 and
1373 interworking. Therefore we force FL_MODE26 to be removed
1374 from insn_flags here (if it was set), so that the search
1375 below will always be able to find a compatible processor. */
1376 insn_flags &= ~FL_MODE26;
1379 if (sought != 0 && ((sought & insn_flags) != sought))
1381 /* Try to locate a CPU type that supports all of the abilities
1382 of the default CPU, plus the extra abilities requested by
1384 for (sel = all_cores; sel->name != NULL; sel++)
1385 if ((sel->flags & sought) == (sought | insn_flags))
1388 if (sel->name == NULL)
1390 unsigned current_bit_count = 0;
1391 const struct processors * best_fit = NULL;
1393 /* Ideally we would like to issue an error message here
1394 saying that it was not possible to find a CPU compatible
1395 with the default CPU, but which also supports the command
1396 line options specified by the programmer, and so they
1397 ought to use the -mcpu=<name> command line option to
1398 override the default CPU type.
1400 If we cannot find a cpu that has both the
1401 characteristics of the default cpu and the given
1402 command line options we scan the array again looking
1403 for a best match. */
1404 for (sel = all_cores; sel->name != NULL; sel++)
1405 if ((sel->flags & sought) == sought)
1409 count = bit_count (sel->flags & insn_flags);
1411 if (count >= current_bit_count)
1414 current_bit_count = count;
1418 gcc_assert (best_fit);
1422 arm_selected_cpu = sel;
1426 gcc_assert (arm_selected_cpu);
1427 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1428 if (!arm_selected_tune)
1429 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1431 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1432 insn_flags = arm_selected_cpu->flags;
1434 arm_tune = arm_selected_tune->core;
1435 tune_flags = arm_selected_tune->flags;
1436 current_tune = arm_selected_tune->tune;
1438 if (target_fp16_format_name)
1440 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1442 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1444 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1448 if (i == ARRAY_SIZE (all_fp16_formats))
1449 error ("invalid __fp16 format option: -mfp16-format=%s",
1450 target_fp16_format_name);
1453 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1455 if (target_abi_name)
1457 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1459 if (streq (arm_all_abis[i].name, target_abi_name))
1461 arm_abi = arm_all_abis[i].abi_type;
1465 if (i == ARRAY_SIZE (arm_all_abis))
1466 error ("invalid ABI option: -mabi=%s", target_abi_name);
1469 arm_abi = ARM_DEFAULT_ABI;
1471 /* Make sure that the processor choice does not conflict with any of the
1472 other command line choices. */
1473 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1474 error ("target CPU does not support ARM mode");
1476 /* BPABI targets use linker tricks to allow interworking on cores
1477 without thumb support. */
1478 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1480 warning (0, "target CPU does not support interworking" );
1481 target_flags &= ~MASK_INTERWORK;
1484 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1486 warning (0, "target CPU does not support THUMB instructions");
1487 target_flags &= ~MASK_THUMB;
1490 if (TARGET_APCS_FRAME && TARGET_THUMB)
1492 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1493 target_flags &= ~MASK_APCS_FRAME;
1496 /* Callee super interworking implies thumb interworking. Adding
1497 this to the flags here simplifies the logic elsewhere. */
1498 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1499 target_flags |= MASK_INTERWORK;
1501 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1502 from here where no function is being compiled currently. */
1503 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1504 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1506 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1507 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1509 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1511 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1512 target_flags |= MASK_APCS_FRAME;
1515 if (TARGET_POKE_FUNCTION_NAME)
1516 target_flags |= MASK_APCS_FRAME;
1518 if (TARGET_APCS_REENT && flag_pic)
1519 error ("-fpic and -mapcs-reent are incompatible");
1521 if (TARGET_APCS_REENT)
1522 warning (0, "APCS reentrant code not supported. Ignored");
1524 /* If this target is normally configured to use APCS frames, warn if they
1525 are turned off and debugging is turned on. */
1527 && write_symbols != NO_DEBUG
1528 && !TARGET_APCS_FRAME
1529 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1530 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1532 if (TARGET_APCS_FLOAT)
1533 warning (0, "passing floating point arguments in fp regs not yet supported");
1535 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1536 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1537 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1538 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1539 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1540 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1541 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1542 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1543 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1544 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1545 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1546 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1547 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1548 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1550 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1551 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1552 thumb_code = TARGET_ARM == 0;
1553 thumb1_code = TARGET_THUMB1 != 0;
1554 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1555 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1556 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1557 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1558 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1560 /* If we are not using the default (ARM mode) section anchor offset
1561 ranges, then set the correct ranges now. */
1564 /* Thumb-1 LDR instructions cannot have negative offsets.
1565 Permissible positive offset ranges are 5-bit (for byte loads),
1566 6-bit (for halfword loads), or 7-bit (for word loads).
1567 Empirical results suggest a 7-bit anchor range gives the best
1568 overall code size. */
1569 targetm.min_anchor_offset = 0;
1570 targetm.max_anchor_offset = 127;
1572 else if (TARGET_THUMB2)
1574 /* The minimum is set such that the total size of the block
1575 for a particular anchor is 248 + 1 + 4095 bytes, which is
1576 divisible by eight, ensuring natural spacing of anchors. */
1577 targetm.min_anchor_offset = -248;
1578 targetm.max_anchor_offset = 4095;
1581 /* V5 code we generate is completely interworking capable, so we turn off
1582 TARGET_INTERWORK here to avoid many tests later on. */
1584 /* XXX However, we must pass the right pre-processor defines to CPP
1585 or GLD can get confused. This is a hack. */
1586 if (TARGET_INTERWORK)
1587 arm_cpp_interwork = 1;
1590 target_flags &= ~MASK_INTERWORK;
1592 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1593 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1595 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1596 error ("iwmmxt abi requires an iwmmxt capable cpu");
1598 if (target_fpu_name == NULL && target_fpe_name != NULL)
1600 if (streq (target_fpe_name, "2"))
1601 target_fpu_name = "fpe2";
1602 else if (streq (target_fpe_name, "3"))
1603 target_fpu_name = "fpe3";
1605 error ("invalid floating point emulation option: -mfpe=%s",
1609 if (target_fpu_name == NULL)
1611 #ifdef FPUTYPE_DEFAULT
1612 target_fpu_name = FPUTYPE_DEFAULT;
1614 if (arm_arch_cirrus)
1615 target_fpu_name = "maverick";
1617 target_fpu_name = "fpe2";
1621 arm_fpu_desc = NULL;
1622 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1624 if (streq (all_fpus[i].name, target_fpu_name))
1626 arm_fpu_desc = &all_fpus[i];
1633 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1637 switch (arm_fpu_desc->model)
1639 case ARM_FP_MODEL_FPA:
1640 if (arm_fpu_desc->rev == 2)
1641 arm_fpu_attr = FPU_FPE2;
1642 else if (arm_fpu_desc->rev == 3)
1643 arm_fpu_attr = FPU_FPE3;
1645 arm_fpu_attr = FPU_FPA;
1648 case ARM_FP_MODEL_MAVERICK:
1649 arm_fpu_attr = FPU_MAVERICK;
1652 case ARM_FP_MODEL_VFP:
1653 arm_fpu_attr = FPU_VFP;
1660 if (target_float_abi_name != NULL)
1662 /* The user specified a FP ABI. */
1663 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1665 if (streq (all_float_abis[i].name, target_float_abi_name))
1667 arm_float_abi = all_float_abis[i].abi_type;
1671 if (i == ARRAY_SIZE (all_float_abis))
1672 error ("invalid floating point abi: -mfloat-abi=%s",
1673 target_float_abi_name);
1676 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1678 if (TARGET_AAPCS_BASED
1679 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1680 error ("FPA is unsupported in the AAPCS");
1682 if (TARGET_AAPCS_BASED)
1684 if (TARGET_CALLER_INTERWORKING)
1685 error ("AAPCS does not support -mcaller-super-interworking");
1687 if (TARGET_CALLEE_INTERWORKING)
1688 error ("AAPCS does not support -mcallee-super-interworking");
1691 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1692 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1693 will ever exist. GCC makes no attempt to support this combination. */
1694 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1695 sorry ("iWMMXt and hardware floating point");
1697 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1698 if (TARGET_THUMB2 && TARGET_IWMMXT)
1699 sorry ("Thumb-2 iWMMXt");
1701 /* __fp16 support currently assumes the core has ldrh. */
1702 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1703 sorry ("__fp16 and no ldrh");
1705 /* If soft-float is specified then don't use FPU. */
1706 if (TARGET_SOFT_FLOAT)
1707 arm_fpu_attr = FPU_NONE;
1709 if (TARGET_AAPCS_BASED)
1711 if (arm_abi == ARM_ABI_IWMMXT)
1712 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1713 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1714 && TARGET_HARD_FLOAT
1716 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1718 arm_pcs_default = ARM_PCS_AAPCS;
1722 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1723 sorry ("-mfloat-abi=hard and VFP");
1725 if (arm_abi == ARM_ABI_APCS)
1726 arm_pcs_default = ARM_PCS_APCS;
1728 arm_pcs_default = ARM_PCS_ATPCS;
1731 /* For arm2/3 there is no need to do any scheduling if there is only
1732 a floating point emulator, or we are doing software floating-point. */
1733 if ((TARGET_SOFT_FLOAT
1734 || (TARGET_FPA && arm_fpu_desc->rev))
1735 && (tune_flags & FL_MODE32) == 0)
1736 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1738 if (target_thread_switch)
1740 if (strcmp (target_thread_switch, "soft") == 0)
1741 target_thread_pointer = TP_SOFT;
1742 else if (strcmp (target_thread_switch, "auto") == 0)
1743 target_thread_pointer = TP_AUTO;
1744 else if (strcmp (target_thread_switch, "cp15") == 0)
1745 target_thread_pointer = TP_CP15;
1747 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1750 /* Use the cp15 method if it is available. */
1751 if (target_thread_pointer == TP_AUTO)
1753 if (arm_arch6k && !TARGET_THUMB1)
1754 target_thread_pointer = TP_CP15;
1756 target_thread_pointer = TP_SOFT;
1759 if (TARGET_HARD_TP && TARGET_THUMB1)
1760 error ("can not use -mtp=cp15 with 16-bit Thumb");
1762 /* Override the default structure alignment for AAPCS ABI. */
1763 if (TARGET_AAPCS_BASED)
1764 arm_structure_size_boundary = 8;
1766 if (structure_size_string != NULL)
1768 int size = strtol (structure_size_string, NULL, 0);
1770 if (size == 8 || size == 32
1771 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1772 arm_structure_size_boundary = size;
1774 warning (0, "structure size boundary can only be set to %s",
1775 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1778 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1780 error ("RTP PIC is incompatible with Thumb");
1784 /* If stack checking is disabled, we can use r10 as the PIC register,
1785 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1786 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1788 if (TARGET_VXWORKS_RTP)
1789 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1790 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1793 if (flag_pic && TARGET_VXWORKS_RTP)
1794 arm_pic_register = 9;
1796 if (arm_pic_register_string != NULL)
1798 int pic_register = decode_reg_name (arm_pic_register_string);
1801 warning (0, "-mpic-register= is useless without -fpic");
1803 /* Prevent the user from choosing an obviously stupid PIC register. */
1804 else if (pic_register < 0 || call_used_regs[pic_register]
1805 || pic_register == HARD_FRAME_POINTER_REGNUM
1806 || pic_register == STACK_POINTER_REGNUM
1807 || pic_register >= PC_REGNUM
1808 || (TARGET_VXWORKS_RTP
1809 && (unsigned int) pic_register != arm_pic_register))
1810 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1812 arm_pic_register = pic_register;
1815 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1816 if (fix_cm3_ldrd == 2)
1818 if (arm_selected_cpu->core == cortexm3)
1824 if (TARGET_THUMB1 && flag_schedule_insns)
1826 /* Don't warn since it's on by default in -O2. */
1827 flag_schedule_insns = 0;
1832 /* If optimizing for size, bump the number of instructions that we
1833 are prepared to conditionally execute (even on a StrongARM). */
1834 max_insns_skipped = 6;
1838 /* StrongARM has early execution of branches, so a sequence
1839 that is worth skipping is shorter. */
1840 if (arm_tune_strongarm)
1841 max_insns_skipped = 3;
1844 /* Hot/Cold partitioning is not currently supported, since we can't
1845 handle literal pool placement in that case. */
1846 if (flag_reorder_blocks_and_partition)
1848 inform (input_location,
1849 "-freorder-blocks-and-partition not supported on this architecture");
1850 flag_reorder_blocks_and_partition = 0;
1851 flag_reorder_blocks = 1;
1855 /* Hoisting PIC address calculations more aggressively provides a small,
1856 but measurable, size reduction for PIC code. Therefore, we decrease
1857 the bar for unrestricted expression hoisting to the cost of PIC address
1858 calculation, which is 2 instructions. */
1859 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1860 global_options.x_param_values,
1861 global_options_set.x_param_values);
1863 /* ARM EABI defaults to strict volatile bitfields. */
1864 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1865 flag_strict_volatile_bitfields = 1;
1867 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1868 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1869 if (flag_prefetch_loop_arrays < 0
1872 && current_tune->num_prefetch_slots > 0)
1873 flag_prefetch_loop_arrays = 1;
1875 /* Set up parameters to be used in prefetching algorithm. Do not override the
1876 defaults unless we are tuning for a core we have researched values for. */
1877 if (current_tune->num_prefetch_slots > 0)
1878 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1879 current_tune->num_prefetch_slots,
1880 global_options.x_param_values,
1881 global_options_set.x_param_values);
1882 if (current_tune->l1_cache_line_size >= 0)
1883 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1884 current_tune->l1_cache_line_size,
1885 global_options.x_param_values,
1886 global_options_set.x_param_values);
1887 if (current_tune->l1_cache_size >= 0)
1888 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1889 current_tune->l1_cache_size,
1890 global_options.x_param_values,
1891 global_options_set.x_param_values);
1893 /* Register global variables with the garbage collector. */
1894 arm_add_gc_roots ();
1898 arm_add_gc_roots (void)
1900 gcc_obstack_init(&minipool_obstack);
1901 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1904 /* A table of known ARM exception types.
1905 For use with the interrupt function attribute. */
1909 const char *const arg;
1910 const unsigned long return_value;
1914 static const isr_attribute_arg isr_attribute_args [] =
1916 { "IRQ", ARM_FT_ISR },
1917 { "irq", ARM_FT_ISR },
1918 { "FIQ", ARM_FT_FIQ },
1919 { "fiq", ARM_FT_FIQ },
1920 { "ABORT", ARM_FT_ISR },
1921 { "abort", ARM_FT_ISR },
1922 { "ABORT", ARM_FT_ISR },
1923 { "abort", ARM_FT_ISR },
1924 { "UNDEF", ARM_FT_EXCEPTION },
1925 { "undef", ARM_FT_EXCEPTION },
1926 { "SWI", ARM_FT_EXCEPTION },
1927 { "swi", ARM_FT_EXCEPTION },
1928 { NULL, ARM_FT_NORMAL }
1931 /* Returns the (interrupt) function type of the current
1932 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1934 static unsigned long
1935 arm_isr_value (tree argument)
1937 const isr_attribute_arg * ptr;
1941 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1943 /* No argument - default to IRQ. */
1944 if (argument == NULL_TREE)
1947 /* Get the value of the argument. */
1948 if (TREE_VALUE (argument) == NULL_TREE
1949 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1950 return ARM_FT_UNKNOWN;
1952 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1954 /* Check it against the list of known arguments. */
1955 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1956 if (streq (arg, ptr->arg))
1957 return ptr->return_value;
1959 /* An unrecognized interrupt type. */
1960 return ARM_FT_UNKNOWN;
1963 /* Computes the type of the current function. */
1965 static unsigned long
1966 arm_compute_func_type (void)
1968 unsigned long type = ARM_FT_UNKNOWN;
1972 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1974 /* Decide if the current function is volatile. Such functions
1975 never return, and many memory cycles can be saved by not storing
1976 register values that will never be needed again. This optimization
1977 was added to speed up context switching in a kernel application. */
1979 && (TREE_NOTHROW (current_function_decl)
1980 || !(flag_unwind_tables
1982 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1983 && TREE_THIS_VOLATILE (current_function_decl))
1984 type |= ARM_FT_VOLATILE;
1986 if (cfun->static_chain_decl != NULL)
1987 type |= ARM_FT_NESTED;
1989 attr = DECL_ATTRIBUTES (current_function_decl);
1991 a = lookup_attribute ("naked", attr);
1993 type |= ARM_FT_NAKED;
1995 a = lookup_attribute ("isr", attr);
1997 a = lookup_attribute ("interrupt", attr);
2000 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2002 type |= arm_isr_value (TREE_VALUE (a));
2007 /* Returns the type of the current function. */
2010 arm_current_func_type (void)
2012 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2013 cfun->machine->func_type = arm_compute_func_type ();
2015 return cfun->machine->func_type;
2019 arm_allocate_stack_slots_for_args (void)
2021 /* Naked functions should not allocate stack slots for arguments. */
2022 return !IS_NAKED (arm_current_func_type ());
2026 /* Output assembler code for a block containing the constant parts
2027 of a trampoline, leaving space for the variable parts.
2029 On the ARM, (if r8 is the static chain regnum, and remembering that
2030 referencing pc adds an offset of 8) the trampoline looks like:
2033 .word static chain value
2034 .word function's address
2035 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2038 arm_asm_trampoline_template (FILE *f)
2042 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2043 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2045 else if (TARGET_THUMB2)
2047 /* The Thumb-2 trampoline is similar to the arm implementation.
2048 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2049 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2050 STATIC_CHAIN_REGNUM, PC_REGNUM);
2051 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2055 ASM_OUTPUT_ALIGN (f, 2);
2056 fprintf (f, "\t.code\t16\n");
2057 fprintf (f, ".Ltrampoline_start:\n");
2058 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2059 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2060 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2061 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2062 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2063 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2065 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2066 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2069 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2072 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2074 rtx fnaddr, mem, a_tramp;
2076 emit_block_move (m_tramp, assemble_trampoline_template (),
2077 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2079 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2080 emit_move_insn (mem, chain_value);
2082 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2083 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2084 emit_move_insn (mem, fnaddr);
2086 a_tramp = XEXP (m_tramp, 0);
2087 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2088 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2089 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2092 /* Thumb trampolines should be entered in thumb mode, so set
2093 the bottom bit of the address. */
2096 arm_trampoline_adjust_address (rtx addr)
2099 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2100 NULL, 0, OPTAB_LIB_WIDEN);
2104 /* Return 1 if it is possible to return using a single instruction.
2105 If SIBLING is non-null, this is a test for a return before a sibling
2106 call. SIBLING is the call insn, so we can examine its register usage. */
2109 use_return_insn (int iscond, rtx sibling)
2112 unsigned int func_type;
2113 unsigned long saved_int_regs;
2114 unsigned HOST_WIDE_INT stack_adjust;
2115 arm_stack_offsets *offsets;
2117 /* Never use a return instruction before reload has run. */
2118 if (!reload_completed)
2121 func_type = arm_current_func_type ();
2123 /* Naked, volatile and stack alignment functions need special
2125 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2128 /* So do interrupt functions that use the frame pointer and Thumb
2129 interrupt functions. */
2130 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2133 offsets = arm_get_frame_offsets ();
2134 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2136 /* As do variadic functions. */
2137 if (crtl->args.pretend_args_size
2138 || cfun->machine->uses_anonymous_args
2139 /* Or if the function calls __builtin_eh_return () */
2140 || crtl->calls_eh_return
2141 /* Or if the function calls alloca */
2142 || cfun->calls_alloca
2143 /* Or if there is a stack adjustment. However, if the stack pointer
2144 is saved on the stack, we can use a pre-incrementing stack load. */
2145 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2146 && stack_adjust == 4)))
2149 saved_int_regs = offsets->saved_regs_mask;
2151 /* Unfortunately, the insn
2153 ldmib sp, {..., sp, ...}
2155 triggers a bug on most SA-110 based devices, such that the stack
2156 pointer won't be correctly restored if the instruction takes a
2157 page fault. We work around this problem by popping r3 along with
2158 the other registers, since that is never slower than executing
2159 another instruction.
2161 We test for !arm_arch5 here, because code for any architecture
2162 less than this could potentially be run on one of the buggy
2164 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2166 /* Validate that r3 is a call-clobbered register (always true in
2167 the default abi) ... */
2168 if (!call_used_regs[3])
2171 /* ... that it isn't being used for a return value ... */
2172 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2175 /* ... or for a tail-call argument ... */
2178 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2180 if (find_regno_fusage (sibling, USE, 3))
2184 /* ... and that there are no call-saved registers in r0-r2
2185 (always true in the default ABI). */
2186 if (saved_int_regs & 0x7)
2190 /* Can't be done if interworking with Thumb, and any registers have been
2192 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2195 /* On StrongARM, conditional returns are expensive if they aren't
2196 taken and multiple registers have been stacked. */
2197 if (iscond && arm_tune_strongarm)
2199 /* Conditional return when just the LR is stored is a simple
2200 conditional-load instruction, that's not expensive. */
2201 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2205 && arm_pic_register != INVALID_REGNUM
2206 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2210 /* If there are saved registers but the LR isn't saved, then we need
2211 two instructions for the return. */
2212 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2215 /* Can't be done if any of the FPA regs are pushed,
2216 since this also requires an insn. */
2217 if (TARGET_HARD_FLOAT && TARGET_FPA)
2218 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2219 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2222 /* Likewise VFP regs. */
2223 if (TARGET_HARD_FLOAT && TARGET_VFP)
2224 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2225 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2228 if (TARGET_REALLY_IWMMXT)
2229 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2230 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2236 /* Return TRUE if int I is a valid immediate ARM constant. */
2239 const_ok_for_arm (HOST_WIDE_INT i)
2243 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2244 be all zero, or all one. */
2245 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2246 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2247 != ((~(unsigned HOST_WIDE_INT) 0)
2248 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2251 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2253 /* Fast return for 0 and small values. We must do this for zero, since
2254 the code below can't handle that one case. */
2255 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2258 /* Get the number of trailing zeros. */
2259 lowbit = ffs((int) i) - 1;
2261 /* Only even shifts are allowed in ARM mode so round down to the
2262 nearest even number. */
2266 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2271 /* Allow rotated constants in ARM mode. */
2273 && ((i & ~0xc000003f) == 0
2274 || (i & ~0xf000000f) == 0
2275 || (i & ~0xfc000003) == 0))
2282 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2285 if (i == v || i == (v | (v << 8)))
2288 /* Allow repeated pattern 0xXY00XY00. */
2298 /* Return true if I is a valid constant for the operation CODE. */
2300 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2302 if (const_ok_for_arm (i))
2326 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2328 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2334 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2338 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2345 /* Emit a sequence of insns to handle a large constant.
2346 CODE is the code of the operation required, it can be any of SET, PLUS,
2347 IOR, AND, XOR, MINUS;
2348 MODE is the mode in which the operation is being performed;
2349 VAL is the integer to operate on;
2350 SOURCE is the other operand (a register, or a null-pointer for SET);
2351 SUBTARGETS means it is safe to create scratch registers if that will
2352 either produce a simpler sequence, or we will want to cse the values.
2353 Return value is the number of insns emitted. */
2355 /* ??? Tweak this for thumb2. */
2357 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2358 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2362 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2363 cond = COND_EXEC_TEST (PATTERN (insn));
2367 if (subtargets || code == SET
2368 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2369 && REGNO (target) != REGNO (source)))
2371 /* After arm_reorg has been called, we can't fix up expensive
2372 constants by pushing them into memory so we must synthesize
2373 them in-line, regardless of the cost. This is only likely to
2374 be more costly on chips that have load delay slots and we are
2375 compiling without running the scheduler (so no splitting
2376 occurred before the final instruction emission).
2378 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2380 if (!after_arm_reorg
2382 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2384 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2389 /* Currently SET is the only monadic value for CODE, all
2390 the rest are diadic. */
2391 if (TARGET_USE_MOVT)
2392 arm_emit_movpair (target, GEN_INT (val));
2394 emit_set_insn (target, GEN_INT (val));
2400 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2402 if (TARGET_USE_MOVT)
2403 arm_emit_movpair (temp, GEN_INT (val));
2405 emit_set_insn (temp, GEN_INT (val));
2407 /* For MINUS, the value is subtracted from, since we never
2408 have subtraction of a constant. */
2410 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2412 emit_set_insn (target,
2413 gen_rtx_fmt_ee (code, mode, source, temp));
2419 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2423 /* Return the number of instructions required to synthesize the given
2424 constant, if we start emitting them from bit-position I. */
2426 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2428 HOST_WIDE_INT temp1;
2429 int step_size = TARGET_ARM ? 2 : 1;
2432 gcc_assert (TARGET_ARM || i == 0);
2440 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2445 temp1 = remainder & ((0x0ff << end)
2446 | ((i < end) ? (0xff >> (32 - end)) : 0));
2447 remainder &= ~temp1;
2452 } while (remainder);
2457 find_best_start (unsigned HOST_WIDE_INT remainder)
2459 int best_consecutive_zeros = 0;
2463 /* If we aren't targetting ARM, the best place to start is always at
2468 for (i = 0; i < 32; i += 2)
2470 int consecutive_zeros = 0;
2472 if (!(remainder & (3 << i)))
2474 while ((i < 32) && !(remainder & (3 << i)))
2476 consecutive_zeros += 2;
2479 if (consecutive_zeros > best_consecutive_zeros)
2481 best_consecutive_zeros = consecutive_zeros;
2482 best_start = i - consecutive_zeros;
2488 /* So long as it won't require any more insns to do so, it's
2489 desirable to emit a small constant (in bits 0...9) in the last
2490 insn. This way there is more chance that it can be combined with
2491 a later addressing insn to form a pre-indexed load or store
2492 operation. Consider:
2494 *((volatile int *)0xe0000100) = 1;
2495 *((volatile int *)0xe0000110) = 2;
2497 We want this to wind up as:
2501 str rB, [rA, #0x100]
2503 str rB, [rA, #0x110]
2505 rather than having to synthesize both large constants from scratch.
2507 Therefore, we calculate how many insns would be required to emit
2508 the constant starting from `best_start', and also starting from
2509 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2510 yield a shorter sequence, we may as well use zero. */
2512 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2513 && (count_insns_for_constant (remainder, 0) <=
2514 count_insns_for_constant (remainder, best_start)))
2520 /* Emit an instruction with the indicated PATTERN. If COND is
2521 non-NULL, conditionalize the execution of the instruction on COND
2525 emit_constant_insn (rtx cond, rtx pattern)
2528 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2529 emit_insn (pattern);
2532 /* As above, but extra parameter GENERATE which, if clear, suppresses
2534 /* ??? This needs more work for thumb2. */
2537 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2538 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2543 int final_invert = 0;
2544 int can_negate_initial = 0;
2546 int num_bits_set = 0;
2547 int set_sign_bit_copies = 0;
2548 int clear_sign_bit_copies = 0;
2549 int clear_zero_bit_copies = 0;
2550 int set_zero_bit_copies = 0;
2552 unsigned HOST_WIDE_INT temp1, temp2;
2553 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2554 int step_size = TARGET_ARM ? 2 : 1;
2556 /* Find out which operations are safe for a given CODE. Also do a quick
2557 check for degenerate cases; these can occur when DImode operations
2568 can_negate_initial = 1;
2572 if (remainder == 0xffffffff)
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target,
2577 GEN_INT (ARM_SIGN_EXTEND (val))));
2583 if (reload_completed && rtx_equal_p (target, source))
2587 emit_constant_insn (cond,
2588 gen_rtx_SET (VOIDmode, target, source));
2600 emit_constant_insn (cond,
2601 gen_rtx_SET (VOIDmode, target, const0_rtx));
2604 if (remainder == 0xffffffff)
2606 if (reload_completed && rtx_equal_p (target, source))
2609 emit_constant_insn (cond,
2610 gen_rtx_SET (VOIDmode, target, source));
2619 if (reload_completed && rtx_equal_p (target, source))
2622 emit_constant_insn (cond,
2623 gen_rtx_SET (VOIDmode, target, source));
2627 if (remainder == 0xffffffff)
2630 emit_constant_insn (cond,
2631 gen_rtx_SET (VOIDmode, target,
2632 gen_rtx_NOT (mode, source)));
2638 /* We treat MINUS as (val - source), since (source - val) is always
2639 passed as (source + (-val)). */
2643 emit_constant_insn (cond,
2644 gen_rtx_SET (VOIDmode, target,
2645 gen_rtx_NEG (mode, source)));
2648 if (const_ok_for_arm (val))
2651 emit_constant_insn (cond,
2652 gen_rtx_SET (VOIDmode, target,
2653 gen_rtx_MINUS (mode, GEN_INT (val),
2665 /* If we can do it in one insn get out quickly. */
2666 if (const_ok_for_arm (val)
2667 || (can_negate_initial && const_ok_for_arm (-val))
2668 || (can_invert && const_ok_for_arm (~val)))
2671 emit_constant_insn (cond,
2672 gen_rtx_SET (VOIDmode, target,
2674 ? gen_rtx_fmt_ee (code, mode, source,
2680 /* Calculate a few attributes that may be useful for specific
2682 /* Count number of leading zeros. */
2683 for (i = 31; i >= 0; i--)
2685 if ((remainder & (1 << i)) == 0)
2686 clear_sign_bit_copies++;
2691 /* Count number of leading 1's. */
2692 for (i = 31; i >= 0; i--)
2694 if ((remainder & (1 << i)) != 0)
2695 set_sign_bit_copies++;
2700 /* Count number of trailing zero's. */
2701 for (i = 0; i <= 31; i++)
2703 if ((remainder & (1 << i)) == 0)
2704 clear_zero_bit_copies++;
2709 /* Count number of trailing 1's. */
2710 for (i = 0; i <= 31; i++)
2712 if ((remainder & (1 << i)) != 0)
2713 set_zero_bit_copies++;
2721 /* See if we can use movw. */
2722 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2725 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2730 /* See if we can do this by sign_extending a constant that is known
2731 to be negative. This is a good, way of doing it, since the shift
2732 may well merge into a subsequent insn. */
2733 if (set_sign_bit_copies > 1)
2735 if (const_ok_for_arm
2736 (temp1 = ARM_SIGN_EXTEND (remainder
2737 << (set_sign_bit_copies - 1))))
2741 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2742 emit_constant_insn (cond,
2743 gen_rtx_SET (VOIDmode, new_src,
2745 emit_constant_insn (cond,
2746 gen_ashrsi3 (target, new_src,
2747 GEN_INT (set_sign_bit_copies - 1)));
2751 /* For an inverted constant, we will need to set the low bits,
2752 these will be shifted out of harm's way. */
2753 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2754 if (const_ok_for_arm (~temp1))
2758 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2759 emit_constant_insn (cond,
2760 gen_rtx_SET (VOIDmode, new_src,
2762 emit_constant_insn (cond,
2763 gen_ashrsi3 (target, new_src,
2764 GEN_INT (set_sign_bit_copies - 1)));
2770 /* See if we can calculate the value as the difference between two
2771 valid immediates. */
2772 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2774 int topshift = clear_sign_bit_copies & ~1;
2776 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2777 & (0xff000000 >> topshift));
2779 /* If temp1 is zero, then that means the 9 most significant
2780 bits of remainder were 1 and we've caused it to overflow.
2781 When topshift is 0 we don't need to do anything since we
2782 can borrow from 'bit 32'. */
2783 if (temp1 == 0 && topshift != 0)
2784 temp1 = 0x80000000 >> (topshift - 1);
2786 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2788 if (const_ok_for_arm (temp2))
2792 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2793 emit_constant_insn (cond,
2794 gen_rtx_SET (VOIDmode, new_src,
2796 emit_constant_insn (cond,
2797 gen_addsi3 (target, new_src,
2805 /* See if we can generate this by setting the bottom (or the top)
2806 16 bits, and then shifting these into the other half of the
2807 word. We only look for the simplest cases, to do more would cost
2808 too much. Be careful, however, not to generate this when the
2809 alternative would take fewer insns. */
2810 if (val & 0xffff0000)
2812 temp1 = remainder & 0xffff0000;
2813 temp2 = remainder & 0x0000ffff;
2815 /* Overlaps outside this range are best done using other methods. */
2816 for (i = 9; i < 24; i++)
2818 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2819 && !const_ok_for_arm (temp2))
2821 rtx new_src = (subtargets
2822 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2824 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2825 source, subtargets, generate);
2833 gen_rtx_ASHIFT (mode, source,
2840 /* Don't duplicate cases already considered. */
2841 for (i = 17; i < 24; i++)
2843 if (((temp1 | (temp1 >> i)) == remainder)
2844 && !const_ok_for_arm (temp1))
2846 rtx new_src = (subtargets
2847 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2849 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2850 source, subtargets, generate);
2855 gen_rtx_SET (VOIDmode, target,
2858 gen_rtx_LSHIFTRT (mode, source,
2869 /* If we have IOR or XOR, and the constant can be loaded in a
2870 single instruction, and we can find a temporary to put it in,
2871 then this can be done in two instructions instead of 3-4. */
2873 /* TARGET can't be NULL if SUBTARGETS is 0 */
2874 || (reload_completed && !reg_mentioned_p (target, source)))
2876 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2880 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2882 emit_constant_insn (cond,
2883 gen_rtx_SET (VOIDmode, sub,
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target,
2887 gen_rtx_fmt_ee (code, mode,
2898 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2899 and the remainder 0s for e.g. 0xfff00000)
2900 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2902 This can be done in 2 instructions by using shifts with mov or mvn.
2907 mvn r0, r0, lsr #12 */
2908 if (set_sign_bit_copies > 8
2909 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2913 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2914 rtx shift = GEN_INT (set_sign_bit_copies);
2918 gen_rtx_SET (VOIDmode, sub,
2920 gen_rtx_ASHIFT (mode,
2925 gen_rtx_SET (VOIDmode, target,
2927 gen_rtx_LSHIFTRT (mode, sub,
2934 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2936 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2938 For eg. r0 = r0 | 0xfff
2943 if (set_zero_bit_copies > 8
2944 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2948 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2949 rtx shift = GEN_INT (set_zero_bit_copies);
2953 gen_rtx_SET (VOIDmode, sub,
2955 gen_rtx_LSHIFTRT (mode,
2960 gen_rtx_SET (VOIDmode, target,
2962 gen_rtx_ASHIFT (mode, sub,
2968 /* This will never be reached for Thumb2 because orn is a valid
2969 instruction. This is for Thumb1 and the ARM 32 bit cases.
2971 x = y | constant (such that ~constant is a valid constant)
2973 x = ~(~y & ~constant).
2975 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2979 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2980 emit_constant_insn (cond,
2981 gen_rtx_SET (VOIDmode, sub,
2982 gen_rtx_NOT (mode, source)));
2985 sub = gen_reg_rtx (mode);
2986 emit_constant_insn (cond,
2987 gen_rtx_SET (VOIDmode, sub,
2988 gen_rtx_AND (mode, source,
2990 emit_constant_insn (cond,
2991 gen_rtx_SET (VOIDmode, target,
2992 gen_rtx_NOT (mode, sub)));
2999 /* See if two shifts will do 2 or more insn's worth of work. */
3000 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3002 HOST_WIDE_INT shift_mask = ((0xffffffff
3003 << (32 - clear_sign_bit_copies))
3006 if ((remainder | shift_mask) != 0xffffffff)
3010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3011 insns = arm_gen_constant (AND, mode, cond,
3012 remainder | shift_mask,
3013 new_src, source, subtargets, 1);
3018 rtx targ = subtargets ? NULL_RTX : target;
3019 insns = arm_gen_constant (AND, mode, cond,
3020 remainder | shift_mask,
3021 targ, source, subtargets, 0);
3027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3028 rtx shift = GEN_INT (clear_sign_bit_copies);
3030 emit_insn (gen_ashlsi3 (new_src, source, shift));
3031 emit_insn (gen_lshrsi3 (target, new_src, shift));
3037 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3039 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3041 if ((remainder | shift_mask) != 0xffffffff)
3045 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047 insns = arm_gen_constant (AND, mode, cond,
3048 remainder | shift_mask,
3049 new_src, source, subtargets, 1);
3054 rtx targ = subtargets ? NULL_RTX : target;
3056 insns = arm_gen_constant (AND, mode, cond,
3057 remainder | shift_mask,
3058 targ, source, subtargets, 0);
3064 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3065 rtx shift = GEN_INT (clear_zero_bit_copies);
3067 emit_insn (gen_lshrsi3 (new_src, source, shift));
3068 emit_insn (gen_ashlsi3 (target, new_src, shift));
3080 for (i = 0; i < 32; i++)
3081 if (remainder & (1 << i))
3085 || (code != IOR && can_invert && num_bits_set > 16))
3086 remainder ^= 0xffffffff;
3087 else if (code == PLUS && num_bits_set > 16)
3088 remainder = (-remainder) & 0xffffffff;
3090 /* For XOR, if more than half the bits are set and there's a sequence
3091 of more than 8 consecutive ones in the pattern then we can XOR by the
3092 inverted constant and then invert the final result; this may save an
3093 instruction and might also lead to the final mvn being merged with
3094 some other operation. */
3095 else if (code == XOR && num_bits_set > 16
3096 && (count_insns_for_constant (remainder ^ 0xffffffff,
3098 (remainder ^ 0xffffffff))
3099 < count_insns_for_constant (remainder,
3100 find_best_start (remainder))))
3102 remainder ^= 0xffffffff;
3111 /* Now try and find a way of doing the job in either two or three
3113 We start by looking for the largest block of zeros that are aligned on
3114 a 2-bit boundary, we then fill up the temps, wrapping around to the
3115 top of the word when we drop off the bottom.
3116 In the worst case this code should produce no more than four insns.
3117 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3118 best place to start. */
3120 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3123 /* Now start emitting the insns. */
3124 i = find_best_start (remainder);
3131 if (remainder & (3 << (i - 2)))
3136 temp1 = remainder & ((0x0ff << end)
3137 | ((i < end) ? (0xff >> (32 - end)) : 0));
3138 remainder &= ~temp1;
3142 rtx new_src, temp1_rtx;
3144 if (code == SET || code == MINUS)
3146 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3147 if (can_invert && code != MINUS)
3152 if ((final_invert || remainder) && subtargets)
3153 new_src = gen_reg_rtx (mode);
3158 else if (can_negate)
3162 temp1 = trunc_int_for_mode (temp1, mode);
3163 temp1_rtx = GEN_INT (temp1);
3167 else if (code == MINUS)
3168 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3170 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3172 emit_constant_insn (cond,
3173 gen_rtx_SET (VOIDmode, new_src,
3183 else if (code == MINUS)
3189 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3199 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3200 gen_rtx_NOT (mode, source)));
3207 /* Canonicalize a comparison so that we are more likely to recognize it.
3208 This can be done for a few constant compares, where we can make the
3209 immediate value easier to load. */
3212 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3214 enum machine_mode mode;
3215 unsigned HOST_WIDE_INT i, maxval;
3217 mode = GET_MODE (*op0);
3218 if (mode == VOIDmode)
3219 mode = GET_MODE (*op1);
3221 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3223 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3224 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3225 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3226 for GTU/LEU in Thumb mode. */
3231 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3233 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3236 if (code == GT || code == LE
3237 || (!TARGET_ARM && (code == GTU || code == LEU)))
3239 /* Missing comparison. First try to use an available
3241 if (GET_CODE (*op1) == CONST_INT)
3249 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3251 *op1 = GEN_INT (i + 1);
3252 return code == GT ? GE : LT;
3257 if (i != ~((unsigned HOST_WIDE_INT) 0)
3258 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3260 *op1 = GEN_INT (i + 1);
3261 return code == GTU ? GEU : LTU;
3269 /* If that did not work, reverse the condition. */
3273 return swap_condition (code);
3279 /* Comparisons smaller than DImode. Only adjust comparisons against
3280 an out-of-range constant. */
3281 if (GET_CODE (*op1) != CONST_INT
3282 || const_ok_for_arm (INTVAL (*op1))
3283 || const_ok_for_arm (- INTVAL (*op1)))
3297 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3299 *op1 = GEN_INT (i + 1);
3300 return code == GT ? GE : LT;
3307 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3309 *op1 = GEN_INT (i - 1);
3310 return code == GE ? GT : LE;
3316 if (i != ~((unsigned HOST_WIDE_INT) 0)
3317 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3319 *op1 = GEN_INT (i + 1);
3320 return code == GTU ? GEU : LTU;
3327 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3329 *op1 = GEN_INT (i - 1);
3330 return code == GEU ? GTU : LEU;
3342 /* Define how to find the value returned by a function. */
3345 arm_function_value(const_tree type, const_tree func,
3346 bool outgoing ATTRIBUTE_UNUSED)
3348 enum machine_mode mode;
3349 int unsignedp ATTRIBUTE_UNUSED;
3350 rtx r ATTRIBUTE_UNUSED;
3352 mode = TYPE_MODE (type);
3354 if (TARGET_AAPCS_BASED)
3355 return aapcs_allocate_return_reg (mode, type, func);
3357 /* Promote integer types. */
3358 if (INTEGRAL_TYPE_P (type))
3359 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3361 /* Promotes small structs returned in a register to full-word size
3362 for big-endian AAPCS. */
3363 if (arm_return_in_msb (type))
3365 HOST_WIDE_INT size = int_size_in_bytes (type);
3366 if (size % UNITS_PER_WORD != 0)
3368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3373 return LIBCALL_VALUE (mode);
3377 libcall_eq (const void *p1, const void *p2)
3379 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3383 libcall_hash (const void *p1)
3385 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3389 add_libcall (htab_t htab, rtx libcall)
3391 *htab_find_slot (htab, libcall, INSERT) = libcall;
3395 arm_libcall_uses_aapcs_base (const_rtx libcall)
3397 static bool init_done = false;
3398 static htab_t libcall_htab;
3404 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3406 add_libcall (libcall_htab,
3407 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3408 add_libcall (libcall_htab,
3409 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3410 add_libcall (libcall_htab,
3411 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3412 add_libcall (libcall_htab,
3413 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3415 add_libcall (libcall_htab,
3416 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3417 add_libcall (libcall_htab,
3418 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3419 add_libcall (libcall_htab,
3420 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3421 add_libcall (libcall_htab,
3422 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3424 add_libcall (libcall_htab,
3425 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3426 add_libcall (libcall_htab,
3427 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3428 add_libcall (libcall_htab,
3429 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3430 add_libcall (libcall_htab,
3431 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3432 add_libcall (libcall_htab,
3433 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3434 add_libcall (libcall_htab,
3435 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3438 return libcall && htab_find (libcall_htab, libcall) != NULL;
3442 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3444 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3445 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3447 /* The following libcalls return their result in integer registers,
3448 even though they return a floating point value. */
3449 if (arm_libcall_uses_aapcs_base (libcall))
3450 return gen_rtx_REG (mode, ARG_REGISTER(1));
3454 return LIBCALL_VALUE (mode);
3457 /* Determine the amount of memory needed to store the possible return
3458 registers of an untyped call. */
3460 arm_apply_result_size (void)
3466 if (TARGET_HARD_FLOAT_ABI)
3472 if (TARGET_MAVERICK)
3475 if (TARGET_IWMMXT_ABI)
3482 /* Decide whether TYPE should be returned in memory (true)
3483 or in a register (false). FNTYPE is the type of the function making
3486 arm_return_in_memory (const_tree type, const_tree fntype)
3490 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3492 if (TARGET_AAPCS_BASED)
3494 /* Simple, non-aggregate types (ie not including vectors and
3495 complex) are always returned in a register (or registers).
3496 We don't care about which register here, so we can short-cut
3497 some of the detail. */
3498 if (!AGGREGATE_TYPE_P (type)
3499 && TREE_CODE (type) != VECTOR_TYPE
3500 && TREE_CODE (type) != COMPLEX_TYPE)
3503 /* Any return value that is no larger than one word can be
3505 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3508 /* Check any available co-processors to see if they accept the
3509 type as a register candidate (VFP, for example, can return
3510 some aggregates in consecutive registers). These aren't
3511 available if the call is variadic. */
3512 if (aapcs_select_return_coproc (type, fntype) >= 0)
3515 /* Vector values should be returned using ARM registers, not
3516 memory (unless they're over 16 bytes, which will break since
3517 we only have four call-clobbered registers to play with). */
3518 if (TREE_CODE (type) == VECTOR_TYPE)
3519 return (size < 0 || size > (4 * UNITS_PER_WORD));
3521 /* The rest go in memory. */
3525 if (TREE_CODE (type) == VECTOR_TYPE)
3526 return (size < 0 || size > (4 * UNITS_PER_WORD));
3528 if (!AGGREGATE_TYPE_P (type) &&
3529 (TREE_CODE (type) != VECTOR_TYPE))
3530 /* All simple types are returned in registers. */
3533 if (arm_abi != ARM_ABI_APCS)
3535 /* ATPCS and later return aggregate types in memory only if they are
3536 larger than a word (or are variable size). */
3537 return (size < 0 || size > UNITS_PER_WORD);
3540 /* For the arm-wince targets we choose to be compatible with Microsoft's
3541 ARM and Thumb compilers, which always return aggregates in memory. */
3543 /* All structures/unions bigger than one word are returned in memory.
3544 Also catch the case where int_size_in_bytes returns -1. In this case
3545 the aggregate is either huge or of variable size, and in either case
3546 we will want to return it via memory and not in a register. */
3547 if (size < 0 || size > UNITS_PER_WORD)
3550 if (TREE_CODE (type) == RECORD_TYPE)
3554 /* For a struct the APCS says that we only return in a register
3555 if the type is 'integer like' and every addressable element
3556 has an offset of zero. For practical purposes this means
3557 that the structure can have at most one non bit-field element
3558 and that this element must be the first one in the structure. */
3560 /* Find the first field, ignoring non FIELD_DECL things which will
3561 have been created by C++. */
3562 for (field = TYPE_FIELDS (type);
3563 field && TREE_CODE (field) != FIELD_DECL;
3564 field = DECL_CHAIN (field))
3568 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3570 /* Check that the first field is valid for returning in a register. */
3572 /* ... Floats are not allowed */
3573 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3576 /* ... Aggregates that are not themselves valid for returning in
3577 a register are not allowed. */
3578 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3581 /* Now check the remaining fields, if any. Only bitfields are allowed,
3582 since they are not addressable. */
3583 for (field = DECL_CHAIN (field);
3585 field = DECL_CHAIN (field))
3587 if (TREE_CODE (field) != FIELD_DECL)
3590 if (!DECL_BIT_FIELD_TYPE (field))
3597 if (TREE_CODE (type) == UNION_TYPE)
3601 /* Unions can be returned in registers if every element is
3602 integral, or can be returned in an integer register. */
3603 for (field = TYPE_FIELDS (type);
3605 field = DECL_CHAIN (field))
3607 if (TREE_CODE (field) != FIELD_DECL)
3610 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3613 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3619 #endif /* not ARM_WINCE */
3621 /* Return all other types in memory. */
3625 /* Indicate whether or not words of a double are in big-endian order. */
3628 arm_float_words_big_endian (void)
3630 if (TARGET_MAVERICK)
3633 /* For FPA, float words are always big-endian. For VFP, floats words
3634 follow the memory system mode. */
3642 return (TARGET_BIG_END ? 1 : 0);
3647 const struct pcs_attribute_arg
3651 } pcs_attribute_args[] =
3653 {"aapcs", ARM_PCS_AAPCS},
3654 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3656 /* We could recognize these, but changes would be needed elsewhere
3657 * to implement them. */
3658 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3659 {"atpcs", ARM_PCS_ATPCS},
3660 {"apcs", ARM_PCS_APCS},
3662 {NULL, ARM_PCS_UNKNOWN}
3666 arm_pcs_from_attribute (tree attr)
3668 const struct pcs_attribute_arg *ptr;
3671 /* Get the value of the argument. */
3672 if (TREE_VALUE (attr) == NULL_TREE
3673 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3674 return ARM_PCS_UNKNOWN;
3676 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3678 /* Check it against the list of known arguments. */
3679 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3680 if (streq (arg, ptr->arg))
3683 /* An unrecognized interrupt type. */
3684 return ARM_PCS_UNKNOWN;
3687 /* Get the PCS variant to use for this call. TYPE is the function's type
3688 specification, DECL is the specific declartion. DECL may be null if
3689 the call could be indirect or if this is a library call. */
3691 arm_get_pcs_model (const_tree type, const_tree decl)
3693 bool user_convention = false;
3694 enum arm_pcs user_pcs = arm_pcs_default;
3699 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3702 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3703 user_convention = true;
3706 if (TARGET_AAPCS_BASED)
3708 /* Detect varargs functions. These always use the base rules
3709 (no argument is ever a candidate for a co-processor
3711 bool base_rules = stdarg_p (type);
3713 if (user_convention)
3715 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3716 sorry ("non-AAPCS derived PCS variant");
3717 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3718 error ("variadic functions must use the base AAPCS variant");
3722 return ARM_PCS_AAPCS;
3723 else if (user_convention)
3725 else if (decl && flag_unit_at_a_time)
3727 /* Local functions never leak outside this compilation unit,
3728 so we are free to use whatever conventions are
3730 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3731 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3733 return ARM_PCS_AAPCS_LOCAL;
3736 else if (user_convention && user_pcs != arm_pcs_default)
3737 sorry ("PCS variant");
3739 /* For everything else we use the target's default. */
3740 return arm_pcs_default;
3745 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3746 const_tree fntype ATTRIBUTE_UNUSED,
3747 rtx libcall ATTRIBUTE_UNUSED,
3748 const_tree fndecl ATTRIBUTE_UNUSED)
3750 /* Record the unallocated VFP registers. */
3751 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3752 pcum->aapcs_vfp_reg_alloc = 0;
3755 /* Walk down the type tree of TYPE counting consecutive base elements.
3756 If *MODEP is VOIDmode, then set it to the first valid floating point
3757 type. If a non-floating point type is found, or if a floating point
3758 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3759 otherwise return the count in the sub-tree. */
3761 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3763 enum machine_mode mode;
3766 switch (TREE_CODE (type))
3769 mode = TYPE_MODE (type);
3770 if (mode != DFmode && mode != SFmode)
3773 if (*modep == VOIDmode)
3782 mode = TYPE_MODE (TREE_TYPE (type));
3783 if (mode != DFmode && mode != SFmode)
3786 if (*modep == VOIDmode)
3795 /* Use V2SImode and V4SImode as representatives of all 64-bit
3796 and 128-bit vector types, whether or not those modes are
3797 supported with the present options. */
3798 size = int_size_in_bytes (type);
3811 if (*modep == VOIDmode)
3814 /* Vector modes are considered to be opaque: two vectors are
3815 equivalent for the purposes of being homogeneous aggregates
3816 if they are the same size. */
3825 tree index = TYPE_DOMAIN (type);
3827 /* Can't handle incomplete types. */
3828 if (!COMPLETE_TYPE_P(type))
3831 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3834 || !TYPE_MAX_VALUE (index)
3835 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3836 || !TYPE_MIN_VALUE (index)
3837 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3841 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3842 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3844 /* There must be no padding. */
3845 if (!host_integerp (TYPE_SIZE (type), 1)
3846 || (tree_low_cst (TYPE_SIZE (type), 1)
3847 != count * GET_MODE_BITSIZE (*modep)))
3859 /* Can't handle incomplete types. */
3860 if (!COMPLETE_TYPE_P(type))
3863 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3865 if (TREE_CODE (field) != FIELD_DECL)
3868 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3874 /* There must be no padding. */
3875 if (!host_integerp (TYPE_SIZE (type), 1)
3876 || (tree_low_cst (TYPE_SIZE (type), 1)
3877 != count * GET_MODE_BITSIZE (*modep)))
3884 case QUAL_UNION_TYPE:
3886 /* These aren't very interesting except in a degenerate case. */
3891 /* Can't handle incomplete types. */
3892 if (!COMPLETE_TYPE_P(type))
3895 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3897 if (TREE_CODE (field) != FIELD_DECL)
3900 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3903 count = count > sub_count ? count : sub_count;
3906 /* There must be no padding. */
3907 if (!host_integerp (TYPE_SIZE (type), 1)
3908 || (tree_low_cst (TYPE_SIZE (type), 1)
3909 != count * GET_MODE_BITSIZE (*modep)))
3922 /* Return true if PCS_VARIANT should use VFP registers. */
3924 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3926 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3928 static bool seen_thumb1_vfp = false;
3930 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3932 sorry ("Thumb-1 hard-float VFP ABI");
3933 /* sorry() is not immediately fatal, so only display this once. */
3934 seen_thumb1_vfp = true;
3940 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3943 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3944 (TARGET_VFP_DOUBLE || !is_double));
3948 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3949 enum machine_mode mode, const_tree type,
3950 enum machine_mode *base_mode, int *count)
3952 enum machine_mode new_mode = VOIDmode;
3954 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3955 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3956 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3961 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3964 new_mode = (mode == DCmode ? DFmode : SFmode);
3966 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3968 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3970 if (ag_count > 0 && ag_count <= 4)
3979 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3982 *base_mode = new_mode;
3987 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3988 enum machine_mode mode, const_tree type)
3990 int count ATTRIBUTE_UNUSED;
3991 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3993 if (!use_vfp_abi (pcs_variant, false))
3995 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4000 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4003 if (!use_vfp_abi (pcum->pcs_variant, false))
4006 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4007 &pcum->aapcs_vfp_rmode,
4008 &pcum->aapcs_vfp_rcount);
4012 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4013 const_tree type ATTRIBUTE_UNUSED)
4015 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4016 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4019 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4020 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4022 pcum->aapcs_vfp_reg_alloc = mask << regno;
4023 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4026 int rcount = pcum->aapcs_vfp_rcount;
4028 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4032 /* Avoid using unsupported vector modes. */
4033 if (rmode == V2SImode)
4035 else if (rmode == V4SImode)
4042 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4043 for (i = 0; i < rcount; i++)
4045 rtx tmp = gen_rtx_REG (rmode,
4046 FIRST_VFP_REGNUM + regno + i * rshift);
4047 tmp = gen_rtx_EXPR_LIST
4049 GEN_INT (i * GET_MODE_SIZE (rmode)));
4050 XVECEXP (par, 0, i) = tmp;
4053 pcum->aapcs_reg = par;
4056 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4063 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4064 enum machine_mode mode,
4065 const_tree type ATTRIBUTE_UNUSED)
4067 if (!use_vfp_abi (pcs_variant, false))
4070 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4073 enum machine_mode ag_mode;
4078 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4083 if (ag_mode == V2SImode)
4085 else if (ag_mode == V4SImode)
4091 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4092 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4093 for (i = 0; i < count; i++)
4095 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4096 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4097 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4098 XVECEXP (par, 0, i) = tmp;
4104 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4108 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4109 enum machine_mode mode ATTRIBUTE_UNUSED,
4110 const_tree type ATTRIBUTE_UNUSED)
4112 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4113 pcum->aapcs_vfp_reg_alloc = 0;
4117 #define AAPCS_CP(X) \
4119 aapcs_ ## X ## _cum_init, \
4120 aapcs_ ## X ## _is_call_candidate, \
4121 aapcs_ ## X ## _allocate, \
4122 aapcs_ ## X ## _is_return_candidate, \
4123 aapcs_ ## X ## _allocate_return_reg, \
4124 aapcs_ ## X ## _advance \
4127 /* Table of co-processors that can be used to pass arguments in
4128 registers. Idealy no arugment should be a candidate for more than
4129 one co-processor table entry, but the table is processed in order
4130 and stops after the first match. If that entry then fails to put
4131 the argument into a co-processor register, the argument will go on
4135 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4136 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4138 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4139 BLKmode) is a candidate for this co-processor's registers; this
4140 function should ignore any position-dependent state in
4141 CUMULATIVE_ARGS and only use call-type dependent information. */
4142 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4144 /* Return true if the argument does get a co-processor register; it
4145 should set aapcs_reg to an RTX of the register allocated as is
4146 required for a return from FUNCTION_ARG. */
4147 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4149 /* Return true if a result of mode MODE (or type TYPE if MODE is
4150 BLKmode) is can be returned in this co-processor's registers. */
4151 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4153 /* Allocate and return an RTX element to hold the return type of a
4154 call, this routine must not fail and will only be called if
4155 is_return_candidate returned true with the same parameters. */
4156 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4158 /* Finish processing this argument and prepare to start processing
4160 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4161 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4169 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4174 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4175 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4182 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4184 /* We aren't passed a decl, so we can't check that a call is local.
4185 However, it isn't clear that that would be a win anyway, since it
4186 might limit some tail-calling opportunities. */
4187 enum arm_pcs pcs_variant;
4191 const_tree fndecl = NULL_TREE;
4193 if (TREE_CODE (fntype) == FUNCTION_DECL)
4196 fntype = TREE_TYPE (fntype);
4199 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4202 pcs_variant = arm_pcs_default;
4204 if (pcs_variant != ARM_PCS_AAPCS)
4208 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4209 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4218 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4221 /* We aren't passed a decl, so we can't check that a call is local.
4222 However, it isn't clear that that would be a win anyway, since it
4223 might limit some tail-calling opportunities. */
4224 enum arm_pcs pcs_variant;
4225 int unsignedp ATTRIBUTE_UNUSED;
4229 const_tree fndecl = NULL_TREE;
4231 if (TREE_CODE (fntype) == FUNCTION_DECL)
4234 fntype = TREE_TYPE (fntype);
4237 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4240 pcs_variant = arm_pcs_default;
4242 /* Promote integer types. */
4243 if (type && INTEGRAL_TYPE_P (type))
4244 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4246 if (pcs_variant != ARM_PCS_AAPCS)
4250 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4251 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4253 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4257 /* Promotes small structs returned in a register to full-word size
4258 for big-endian AAPCS. */
4259 if (type && arm_return_in_msb (type))
4261 HOST_WIDE_INT size = int_size_in_bytes (type);
4262 if (size % UNITS_PER_WORD != 0)
4264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4269 return gen_rtx_REG (mode, R0_REGNUM);
4273 aapcs_libcall_value (enum machine_mode mode)
4275 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4278 /* Lay out a function argument using the AAPCS rules. The rule
4279 numbers referred to here are those in the AAPCS. */
4281 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4282 const_tree type, bool named)
4287 /* We only need to do this once per argument. */
4288 if (pcum->aapcs_arg_processed)
4291 pcum->aapcs_arg_processed = true;
4293 /* Special case: if named is false then we are handling an incoming
4294 anonymous argument which is on the stack. */
4298 /* Is this a potential co-processor register candidate? */
4299 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4301 int slot = aapcs_select_call_coproc (pcum, mode, type);
4302 pcum->aapcs_cprc_slot = slot;
4304 /* We don't have to apply any of the rules from part B of the
4305 preparation phase, these are handled elsewhere in the
4310 /* A Co-processor register candidate goes either in its own
4311 class of registers or on the stack. */
4312 if (!pcum->aapcs_cprc_failed[slot])
4314 /* C1.cp - Try to allocate the argument to co-processor
4316 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4319 /* C2.cp - Put the argument on the stack and note that we
4320 can't assign any more candidates in this slot. We also
4321 need to note that we have allocated stack space, so that
4322 we won't later try to split a non-cprc candidate between
4323 core registers and the stack. */
4324 pcum->aapcs_cprc_failed[slot] = true;
4325 pcum->can_split = false;
4328 /* We didn't get a register, so this argument goes on the
4330 gcc_assert (pcum->can_split == false);
4335 /* C3 - For double-word aligned arguments, round the NCRN up to the
4336 next even number. */
4337 ncrn = pcum->aapcs_ncrn;
4338 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4341 nregs = ARM_NUM_REGS2(mode, type);
4343 /* Sigh, this test should really assert that nregs > 0, but a GCC
4344 extension allows empty structs and then gives them empty size; it
4345 then allows such a structure to be passed by value. For some of
4346 the code below we have to pretend that such an argument has
4347 non-zero size so that we 'locate' it correctly either in
4348 registers or on the stack. */
4349 gcc_assert (nregs >= 0);
4351 nregs2 = nregs ? nregs : 1;
4353 /* C4 - Argument fits entirely in core registers. */
4354 if (ncrn + nregs2 <= NUM_ARG_REGS)
4356 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4357 pcum->aapcs_next_ncrn = ncrn + nregs;
4361 /* C5 - Some core registers left and there are no arguments already
4362 on the stack: split this argument between the remaining core
4363 registers and the stack. */
4364 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4366 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4367 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4368 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4372 /* C6 - NCRN is set to 4. */
4373 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4375 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4379 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4380 for a call to a function whose data type is FNTYPE.
4381 For a library call, FNTYPE is NULL. */
4383 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4385 tree fndecl ATTRIBUTE_UNUSED)
4387 /* Long call handling. */
4389 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4391 pcum->pcs_variant = arm_pcs_default;
4393 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4395 if (arm_libcall_uses_aapcs_base (libname))
4396 pcum->pcs_variant = ARM_PCS_AAPCS;
4398 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4399 pcum->aapcs_reg = NULL_RTX;
4400 pcum->aapcs_partial = 0;
4401 pcum->aapcs_arg_processed = false;
4402 pcum->aapcs_cprc_slot = -1;
4403 pcum->can_split = true;
4405 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4409 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4411 pcum->aapcs_cprc_failed[i] = false;
4412 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4420 /* On the ARM, the offset starts at 0. */
4422 pcum->iwmmxt_nregs = 0;
4423 pcum->can_split = true;
4425 /* Varargs vectors are treated the same as long long.
4426 named_count avoids having to change the way arm handles 'named' */
4427 pcum->named_count = 0;
4430 if (TARGET_REALLY_IWMMXT && fntype)
4434 for (fn_arg = TYPE_ARG_TYPES (fntype);
4436 fn_arg = TREE_CHAIN (fn_arg))
4437 pcum->named_count += 1;
4439 if (! pcum->named_count)
4440 pcum->named_count = INT_MAX;
4445 /* Return true if mode/type need doubleword alignment. */
4447 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4449 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4450 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4454 /* Determine where to put an argument to a function.
4455 Value is zero to push the argument on the stack,
4456 or a hard register in which to store the argument.
4458 MODE is the argument's machine mode.
4459 TYPE is the data type of the argument (as a tree).
4460 This is null for libcalls where that information may
4462 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4463 the preceding args and about the function being called.
4464 NAMED is nonzero if this argument is a named parameter
4465 (otherwise it is an extra parameter matching an ellipsis).
4467 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4468 other arguments are passed on the stack. If (NAMED == 0) (which happens
4469 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4470 defined), say it is passed in the stack (function_prologue will
4471 indeed make it pass in the stack if necessary). */
4474 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4475 const_tree type, bool named)
4479 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4480 a call insn (op3 of a call_value insn). */
4481 if (mode == VOIDmode)
4484 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4486 aapcs_layout_arg (pcum, mode, type, named);
4487 return pcum->aapcs_reg;
4490 /* Varargs vectors are treated the same as long long.
4491 named_count avoids having to change the way arm handles 'named' */
4492 if (TARGET_IWMMXT_ABI
4493 && arm_vector_mode_supported_p (mode)
4494 && pcum->named_count > pcum->nargs + 1)
4496 if (pcum->iwmmxt_nregs <= 9)
4497 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4500 pcum->can_split = false;
4505 /* Put doubleword aligned quantities in even register pairs. */
4507 && ARM_DOUBLEWORD_ALIGN
4508 && arm_needs_doubleword_align (mode, type))
4511 /* Only allow splitting an arg between regs and memory if all preceding
4512 args were allocated to regs. For args passed by reference we only count
4513 the reference pointer. */
4514 if (pcum->can_split)
4517 nregs = ARM_NUM_REGS2 (mode, type);
4519 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4522 return gen_rtx_REG (mode, pcum->nregs);
4526 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4528 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4529 ? DOUBLEWORD_ALIGNMENT
4534 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4535 tree type, bool named)
4537 int nregs = pcum->nregs;
4539 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4541 aapcs_layout_arg (pcum, mode, type, named);
4542 return pcum->aapcs_partial;
4545 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4548 if (NUM_ARG_REGS > nregs
4549 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4551 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4556 /* Update the data in PCUM to advance over an argument
4557 of mode MODE and data type TYPE.
4558 (TYPE is null for libcalls where that information may not be available.) */
4561 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4562 const_tree type, bool named)
4564 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4566 aapcs_layout_arg (pcum, mode, type, named);
4568 if (pcum->aapcs_cprc_slot >= 0)
4570 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4572 pcum->aapcs_cprc_slot = -1;
4575 /* Generic stuff. */
4576 pcum->aapcs_arg_processed = false;
4577 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4578 pcum->aapcs_reg = NULL_RTX;
4579 pcum->aapcs_partial = 0;
4584 if (arm_vector_mode_supported_p (mode)
4585 && pcum->named_count > pcum->nargs
4586 && TARGET_IWMMXT_ABI)
4587 pcum->iwmmxt_nregs += 1;
4589 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4593 /* Variable sized types are passed by reference. This is a GCC
4594 extension to the ARM ABI. */
4597 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4598 enum machine_mode mode ATTRIBUTE_UNUSED,
4599 const_tree type, bool named ATTRIBUTE_UNUSED)
4601 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4604 /* Encode the current state of the #pragma [no_]long_calls. */
4607 OFF, /* No #pragma [no_]long_calls is in effect. */
4608 LONG, /* #pragma long_calls is in effect. */
4609 SHORT /* #pragma no_long_calls is in effect. */
4612 static arm_pragma_enum arm_pragma_long_calls = OFF;
4615 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4617 arm_pragma_long_calls = LONG;
4621 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4623 arm_pragma_long_calls = SHORT;
4627 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4629 arm_pragma_long_calls = OFF;
4632 /* Handle an attribute requiring a FUNCTION_DECL;
4633 arguments as in struct attribute_spec.handler. */
4635 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4636 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4638 if (TREE_CODE (*node) != FUNCTION_DECL)
4640 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4642 *no_add_attrs = true;
4648 /* Handle an "interrupt" or "isr" attribute;
4649 arguments as in struct attribute_spec.handler. */
4651 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4656 if (TREE_CODE (*node) != FUNCTION_DECL)
4658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4660 *no_add_attrs = true;
4662 /* FIXME: the argument if any is checked for type attributes;
4663 should it be checked for decl ones? */
4667 if (TREE_CODE (*node) == FUNCTION_TYPE
4668 || TREE_CODE (*node) == METHOD_TYPE)
4670 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4672 warning (OPT_Wattributes, "%qE attribute ignored",
4674 *no_add_attrs = true;
4677 else if (TREE_CODE (*node) == POINTER_TYPE
4678 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4679 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4680 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4682 *node = build_variant_type_copy (*node);
4683 TREE_TYPE (*node) = build_type_attribute_variant
4685 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4686 *no_add_attrs = true;
4690 /* Possibly pass this attribute on from the type to a decl. */
4691 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4692 | (int) ATTR_FLAG_FUNCTION_NEXT
4693 | (int) ATTR_FLAG_ARRAY_NEXT))
4695 *no_add_attrs = true;
4696 return tree_cons (name, args, NULL_TREE);
4700 warning (OPT_Wattributes, "%qE attribute ignored",
4709 /* Handle a "pcs" attribute; arguments as in struct
4710 attribute_spec.handler. */
4712 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4713 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4715 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4717 warning (OPT_Wattributes, "%qE attribute ignored", name);
4718 *no_add_attrs = true;
4723 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4724 /* Handle the "notshared" attribute. This attribute is another way of
4725 requesting hidden visibility. ARM's compiler supports
4726 "__declspec(notshared)"; we support the same thing via an
4730 arm_handle_notshared_attribute (tree *node,
4731 tree name ATTRIBUTE_UNUSED,
4732 tree args ATTRIBUTE_UNUSED,
4733 int flags ATTRIBUTE_UNUSED,
4736 tree decl = TYPE_NAME (*node);
4740 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4741 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4742 *no_add_attrs = false;
4748 /* Return 0 if the attributes for two types are incompatible, 1 if they
4749 are compatible, and 2 if they are nearly compatible (which causes a
4750 warning to be generated). */
4752 arm_comp_type_attributes (const_tree type1, const_tree type2)
4756 /* Check for mismatch of non-default calling convention. */
4757 if (TREE_CODE (type1) != FUNCTION_TYPE)
4760 /* Check for mismatched call attributes. */
4761 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4762 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4763 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4764 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4766 /* Only bother to check if an attribute is defined. */
4767 if (l1 | l2 | s1 | s2)
4769 /* If one type has an attribute, the other must have the same attribute. */
4770 if ((l1 != l2) || (s1 != s2))
4773 /* Disallow mixed attributes. */
4774 if ((l1 & s2) || (l2 & s1))
4778 /* Check for mismatched ISR attribute. */
4779 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4781 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4782 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4784 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4791 /* Assigns default attributes to newly defined type. This is used to
4792 set short_call/long_call attributes for function types of
4793 functions defined inside corresponding #pragma scopes. */
4795 arm_set_default_type_attributes (tree type)
4797 /* Add __attribute__ ((long_call)) to all functions, when
4798 inside #pragma long_calls or __attribute__ ((short_call)),
4799 when inside #pragma no_long_calls. */
4800 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4802 tree type_attr_list, attr_name;
4803 type_attr_list = TYPE_ATTRIBUTES (type);
4805 if (arm_pragma_long_calls == LONG)
4806 attr_name = get_identifier ("long_call");
4807 else if (arm_pragma_long_calls == SHORT)
4808 attr_name = get_identifier ("short_call");
4812 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4813 TYPE_ATTRIBUTES (type) = type_attr_list;
4817 /* Return true if DECL is known to be linked into section SECTION. */
4820 arm_function_in_section_p (tree decl, section *section)
4822 /* We can only be certain about functions defined in the same
4823 compilation unit. */
4824 if (!TREE_STATIC (decl))
4827 /* Make sure that SYMBOL always binds to the definition in this
4828 compilation unit. */
4829 if (!targetm.binds_local_p (decl))
4832 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4833 if (!DECL_SECTION_NAME (decl))
4835 /* Make sure that we will not create a unique section for DECL. */
4836 if (flag_function_sections || DECL_ONE_ONLY (decl))
4840 return function_section (decl) == section;
4843 /* Return nonzero if a 32-bit "long_call" should be generated for
4844 a call from the current function to DECL. We generate a long_call
4847 a. has an __attribute__((long call))
4848 or b. is within the scope of a #pragma long_calls
4849 or c. the -mlong-calls command line switch has been specified
4851 However we do not generate a long call if the function:
4853 d. has an __attribute__ ((short_call))
4854 or e. is inside the scope of a #pragma no_long_calls
4855 or f. is defined in the same section as the current function. */
4858 arm_is_long_call_p (tree decl)
4863 return TARGET_LONG_CALLS;
4865 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4866 if (lookup_attribute ("short_call", attrs))
4869 /* For "f", be conservative, and only cater for cases in which the
4870 whole of the current function is placed in the same section. */
4871 if (!flag_reorder_blocks_and_partition
4872 && TREE_CODE (decl) == FUNCTION_DECL
4873 && arm_function_in_section_p (decl, current_function_section ()))
4876 if (lookup_attribute ("long_call", attrs))
4879 return TARGET_LONG_CALLS;
4882 /* Return nonzero if it is ok to make a tail-call to DECL. */
4884 arm_function_ok_for_sibcall (tree decl, tree exp)
4886 unsigned long func_type;
4888 if (cfun->machine->sibcall_blocked)
4891 /* Never tailcall something for which we have no decl, or if we
4892 are generating code for Thumb-1. */
4893 if (decl == NULL || TARGET_THUMB1)
4896 /* The PIC register is live on entry to VxWorks PLT entries, so we
4897 must make the call before restoring the PIC register. */
4898 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4901 /* Cannot tail-call to long calls, since these are out of range of
4902 a branch instruction. */
4903 if (arm_is_long_call_p (decl))
4906 /* If we are interworking and the function is not declared static
4907 then we can't tail-call it unless we know that it exists in this
4908 compilation unit (since it might be a Thumb routine). */
4909 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4912 func_type = arm_current_func_type ();
4913 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4914 if (IS_INTERRUPT (func_type))
4917 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4919 /* Check that the return value locations are the same. For
4920 example that we aren't returning a value from the sibling in
4921 a VFP register but then need to transfer it to a core
4925 a = arm_function_value (TREE_TYPE (exp), decl, false);
4926 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4928 if (!rtx_equal_p (a, b))
4932 /* Never tailcall if function may be called with a misaligned SP. */
4933 if (IS_STACKALIGN (func_type))
4936 /* Everything else is ok. */
4941 /* Addressing mode support functions. */
4943 /* Return nonzero if X is a legitimate immediate operand when compiling
4944 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4946 legitimate_pic_operand_p (rtx x)
4948 if (GET_CODE (x) == SYMBOL_REF
4949 || (GET_CODE (x) == CONST
4950 && GET_CODE (XEXP (x, 0)) == PLUS
4951 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4957 /* Record that the current function needs a PIC register. Initialize
4958 cfun->machine->pic_reg if we have not already done so. */
4961 require_pic_register (void)
4963 /* A lot of the logic here is made obscure by the fact that this
4964 routine gets called as part of the rtx cost estimation process.
4965 We don't want those calls to affect any assumptions about the real
4966 function; and further, we can't call entry_of_function() until we
4967 start the real expansion process. */
4968 if (!crtl->uses_pic_offset_table)
4970 gcc_assert (can_create_pseudo_p ());
4971 if (arm_pic_register != INVALID_REGNUM)
4973 if (!cfun->machine->pic_reg)
4974 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4976 /* Play games to avoid marking the function as needing pic
4977 if we are being called as part of the cost-estimation
4979 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4980 crtl->uses_pic_offset_table = 1;
4986 if (!cfun->machine->pic_reg)
4987 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4989 /* Play games to avoid marking the function as needing pic
4990 if we are being called as part of the cost-estimation
4992 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4994 crtl->uses_pic_offset_table = 1;
4997 arm_load_pic_register (0UL);
5002 for (insn = seq; insn; insn = NEXT_INSN (insn))
5004 INSN_LOCATOR (insn) = prologue_locator;
5006 /* We can be called during expansion of PHI nodes, where
5007 we can't yet emit instructions directly in the final
5008 insn stream. Queue the insns on the entry edge, they will
5009 be committed after everything else is expanded. */
5010 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5017 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5019 if (GET_CODE (orig) == SYMBOL_REF
5020 || GET_CODE (orig) == LABEL_REF)
5026 gcc_assert (can_create_pseudo_p ());
5027 reg = gen_reg_rtx (Pmode);
5030 /* VxWorks does not impose a fixed gap between segments; the run-time
5031 gap can be different from the object-file gap. We therefore can't
5032 use GOTOFF unless we are absolutely sure that the symbol is in the
5033 same segment as the GOT. Unfortunately, the flexibility of linker
5034 scripts means that we can't be sure of that in general, so assume
5035 that GOTOFF is never valid on VxWorks. */
5036 if ((GET_CODE (orig) == LABEL_REF
5037 || (GET_CODE (orig) == SYMBOL_REF &&
5038 SYMBOL_REF_LOCAL_P (orig)))
5040 && !TARGET_VXWORKS_RTP)
5041 insn = arm_pic_static_addr (orig, reg);
5047 /* If this function doesn't have a pic register, create one now. */
5048 require_pic_register ();
5050 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5052 /* Make the MEM as close to a constant as possible. */
5053 mem = SET_SRC (pat);
5054 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5055 MEM_READONLY_P (mem) = 1;
5056 MEM_NOTRAP_P (mem) = 1;
5058 insn = emit_insn (pat);
5061 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5063 set_unique_reg_note (insn, REG_EQUAL, orig);
5067 else if (GET_CODE (orig) == CONST)
5071 if (GET_CODE (XEXP (orig, 0)) == PLUS
5072 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5075 /* Handle the case where we have: const (UNSPEC_TLS). */
5076 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5077 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5080 /* Handle the case where we have:
5081 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5083 if (GET_CODE (XEXP (orig, 0)) == PLUS
5084 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5085 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5087 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5093 gcc_assert (can_create_pseudo_p ());
5094 reg = gen_reg_rtx (Pmode);
5097 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5099 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5100 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5101 base == reg ? 0 : reg);
5103 if (GET_CODE (offset) == CONST_INT)
5105 /* The base register doesn't really matter, we only want to
5106 test the index for the appropriate mode. */
5107 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5109 gcc_assert (can_create_pseudo_p ());
5110 offset = force_reg (Pmode, offset);
5113 if (GET_CODE (offset) == CONST_INT)
5114 return plus_constant (base, INTVAL (offset));
5117 if (GET_MODE_SIZE (mode) > 4
5118 && (GET_MODE_CLASS (mode) == MODE_INT
5119 || TARGET_SOFT_FLOAT))
5121 emit_insn (gen_addsi3 (reg, base, offset));
5125 return gen_rtx_PLUS (Pmode, base, offset);
5132 /* Find a spare register to use during the prolog of a function. */
5135 thumb_find_work_register (unsigned long pushed_regs_mask)
5139 /* Check the argument registers first as these are call-used. The
5140 register allocation order means that sometimes r3 might be used
5141 but earlier argument registers might not, so check them all. */
5142 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5143 if (!df_regs_ever_live_p (reg))
5146 /* Before going on to check the call-saved registers we can try a couple
5147 more ways of deducing that r3 is available. The first is when we are
5148 pushing anonymous arguments onto the stack and we have less than 4
5149 registers worth of fixed arguments(*). In this case r3 will be part of
5150 the variable argument list and so we can be sure that it will be
5151 pushed right at the start of the function. Hence it will be available
5152 for the rest of the prologue.
5153 (*): ie crtl->args.pretend_args_size is greater than 0. */
5154 if (cfun->machine->uses_anonymous_args
5155 && crtl->args.pretend_args_size > 0)
5156 return LAST_ARG_REGNUM;
5158 /* The other case is when we have fixed arguments but less than 4 registers
5159 worth. In this case r3 might be used in the body of the function, but
5160 it is not being used to convey an argument into the function. In theory
5161 we could just check crtl->args.size to see how many bytes are
5162 being passed in argument registers, but it seems that it is unreliable.
5163 Sometimes it will have the value 0 when in fact arguments are being
5164 passed. (See testcase execute/20021111-1.c for an example). So we also
5165 check the args_info.nregs field as well. The problem with this field is
5166 that it makes no allowances for arguments that are passed to the
5167 function but which are not used. Hence we could miss an opportunity
5168 when a function has an unused argument in r3. But it is better to be
5169 safe than to be sorry. */
5170 if (! cfun->machine->uses_anonymous_args
5171 && crtl->args.size >= 0
5172 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5173 && crtl->args.info.nregs < 4)
5174 return LAST_ARG_REGNUM;
5176 /* Otherwise look for a call-saved register that is going to be pushed. */
5177 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5178 if (pushed_regs_mask & (1 << reg))
5183 /* Thumb-2 can use high regs. */
5184 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5185 if (pushed_regs_mask & (1 << reg))
5188 /* Something went wrong - thumb_compute_save_reg_mask()
5189 should have arranged for a suitable register to be pushed. */
5193 static GTY(()) int pic_labelno;
5195 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5199 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5201 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5203 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5206 gcc_assert (flag_pic);
5208 pic_reg = cfun->machine->pic_reg;
5209 if (TARGET_VXWORKS_RTP)
5211 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5212 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5213 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5215 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5217 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5218 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5222 /* We use an UNSPEC rather than a LABEL_REF because this label
5223 never appears in the code stream. */
5225 labelno = GEN_INT (pic_labelno++);
5226 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5227 l1 = gen_rtx_CONST (VOIDmode, l1);
5229 /* On the ARM the PC register contains 'dot + 8' at the time of the
5230 addition, on the Thumb it is 'dot + 4'. */
5231 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5232 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5234 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5238 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5240 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5242 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5244 else /* TARGET_THUMB1 */
5246 if (arm_pic_register != INVALID_REGNUM
5247 && REGNO (pic_reg) > LAST_LO_REGNUM)
5249 /* We will have pushed the pic register, so we should always be
5250 able to find a work register. */
5251 pic_tmp = gen_rtx_REG (SImode,
5252 thumb_find_work_register (saved_regs));
5253 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5254 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5257 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5258 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5262 /* Need to emit this whether or not we obey regdecls,
5263 since setjmp/longjmp can cause life info to screw up. */
5267 /* Generate code to load the address of a static var when flag_pic is set. */
5269 arm_pic_static_addr (rtx orig, rtx reg)
5271 rtx l1, labelno, offset_rtx, insn;
5273 gcc_assert (flag_pic);
5275 /* We use an UNSPEC rather than a LABEL_REF because this label
5276 never appears in the code stream. */
5277 labelno = GEN_INT (pic_labelno++);
5278 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5279 l1 = gen_rtx_CONST (VOIDmode, l1);
5281 /* On the ARM the PC register contains 'dot + 8' at the time of the
5282 addition, on the Thumb it is 'dot + 4'. */
5283 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5284 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5285 UNSPEC_SYMBOL_OFFSET);
5286 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5290 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5292 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5294 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5296 else /* TARGET_THUMB1 */
5298 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5299 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5305 /* Return nonzero if X is valid as an ARM state addressing register. */
5307 arm_address_register_rtx_p (rtx x, int strict_p)
5311 if (GET_CODE (x) != REG)
5317 return ARM_REGNO_OK_FOR_BASE_P (regno);
5319 return (regno <= LAST_ARM_REGNUM
5320 || regno >= FIRST_PSEUDO_REGISTER
5321 || regno == FRAME_POINTER_REGNUM
5322 || regno == ARG_POINTER_REGNUM);
5325 /* Return TRUE if this rtx is the difference of a symbol and a label,
5326 and will reduce to a PC-relative relocation in the object file.
5327 Expressions like this can be left alone when generating PIC, rather
5328 than forced through the GOT. */
5330 pcrel_constant_p (rtx x)
5332 if (GET_CODE (x) == MINUS)
5333 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5338 /* Return true if X will surely end up in an index register after next
5341 will_be_in_index_register (const_rtx x)
5343 /* arm.md: calculate_pic_address will split this into a register. */
5344 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5347 /* Return nonzero if X is a valid ARM state address operand. */
5349 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5353 enum rtx_code code = GET_CODE (x);
5355 if (arm_address_register_rtx_p (x, strict_p))
5358 use_ldrd = (TARGET_LDRD
5360 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5362 if (code == POST_INC || code == PRE_DEC
5363 || ((code == PRE_INC || code == POST_DEC)
5364 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5365 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5367 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5368 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5369 && GET_CODE (XEXP (x, 1)) == PLUS
5370 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5372 rtx addend = XEXP (XEXP (x, 1), 1);
5374 /* Don't allow ldrd post increment by register because it's hard
5375 to fixup invalid register choices. */
5377 && GET_CODE (x) == POST_MODIFY
5378 && GET_CODE (addend) == REG)
5381 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5382 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5385 /* After reload constants split into minipools will have addresses
5386 from a LABEL_REF. */
5387 else if (reload_completed
5388 && (code == LABEL_REF
5390 && GET_CODE (XEXP (x, 0)) == PLUS
5391 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5392 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5395 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5398 else if (code == PLUS)
5400 rtx xop0 = XEXP (x, 0);
5401 rtx xop1 = XEXP (x, 1);
5403 return ((arm_address_register_rtx_p (xop0, strict_p)
5404 && ((GET_CODE(xop1) == CONST_INT
5405 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5406 || (!strict_p && will_be_in_index_register (xop1))))
5407 || (arm_address_register_rtx_p (xop1, strict_p)
5408 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5412 /* Reload currently can't handle MINUS, so disable this for now */
5413 else if (GET_CODE (x) == MINUS)
5415 rtx xop0 = XEXP (x, 0);
5416 rtx xop1 = XEXP (x, 1);
5418 return (arm_address_register_rtx_p (xop0, strict_p)
5419 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5423 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5424 && code == SYMBOL_REF
5425 && CONSTANT_POOL_ADDRESS_P (x)
5427 && symbol_mentioned_p (get_pool_constant (x))
5428 && ! pcrel_constant_p (get_pool_constant (x))))
5434 /* Return nonzero if X is a valid Thumb-2 address operand. */
5436 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5439 enum rtx_code code = GET_CODE (x);
5441 if (arm_address_register_rtx_p (x, strict_p))
5444 use_ldrd = (TARGET_LDRD
5446 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5448 if (code == POST_INC || code == PRE_DEC
5449 || ((code == PRE_INC || code == POST_DEC)
5450 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5451 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5453 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5454 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5455 && GET_CODE (XEXP (x, 1)) == PLUS
5456 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5458 /* Thumb-2 only has autoincrement by constant. */
5459 rtx addend = XEXP (XEXP (x, 1), 1);
5460 HOST_WIDE_INT offset;
5462 if (GET_CODE (addend) != CONST_INT)
5465 offset = INTVAL(addend);
5466 if (GET_MODE_SIZE (mode) <= 4)
5467 return (offset > -256 && offset < 256);
5469 return (use_ldrd && offset > -1024 && offset < 1024
5470 && (offset & 3) == 0);
5473 /* After reload constants split into minipools will have addresses
5474 from a LABEL_REF. */
5475 else if (reload_completed
5476 && (code == LABEL_REF
5478 && GET_CODE (XEXP (x, 0)) == PLUS
5479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5483 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5486 else if (code == PLUS)
5488 rtx xop0 = XEXP (x, 0);
5489 rtx xop1 = XEXP (x, 1);
5491 return ((arm_address_register_rtx_p (xop0, strict_p)
5492 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5493 || (!strict_p && will_be_in_index_register (xop1))))
5494 || (arm_address_register_rtx_p (xop1, strict_p)
5495 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5498 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5499 && code == SYMBOL_REF
5500 && CONSTANT_POOL_ADDRESS_P (x)
5502 && symbol_mentioned_p (get_pool_constant (x))
5503 && ! pcrel_constant_p (get_pool_constant (x))))
5509 /* Return nonzero if INDEX is valid for an address index operand in
5512 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5515 HOST_WIDE_INT range;
5516 enum rtx_code code = GET_CODE (index);
5518 /* Standard coprocessor addressing modes. */
5519 if (TARGET_HARD_FLOAT
5520 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5521 && (mode == SFmode || mode == DFmode
5522 || (TARGET_MAVERICK && mode == DImode)))
5523 return (code == CONST_INT && INTVAL (index) < 1024
5524 && INTVAL (index) > -1024
5525 && (INTVAL (index) & 3) == 0);
5527 /* For quad modes, we restrict the constant offset to be slightly less
5528 than what the instruction format permits. We do this because for
5529 quad mode moves, we will actually decompose them into two separate
5530 double-mode reads or writes. INDEX must therefore be a valid
5531 (double-mode) offset and so should INDEX+8. */
5532 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5533 return (code == CONST_INT
5534 && INTVAL (index) < 1016
5535 && INTVAL (index) > -1024
5536 && (INTVAL (index) & 3) == 0);
5538 /* We have no such constraint on double mode offsets, so we permit the
5539 full range of the instruction format. */
5540 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5541 return (code == CONST_INT
5542 && INTVAL (index) < 1024
5543 && INTVAL (index) > -1024
5544 && (INTVAL (index) & 3) == 0);
5546 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5547 return (code == CONST_INT
5548 && INTVAL (index) < 1024
5549 && INTVAL (index) > -1024
5550 && (INTVAL (index) & 3) == 0);
5552 if (arm_address_register_rtx_p (index, strict_p)
5553 && (GET_MODE_SIZE (mode) <= 4))
5556 if (mode == DImode || mode == DFmode)
5558 if (code == CONST_INT)
5560 HOST_WIDE_INT val = INTVAL (index);
5563 return val > -256 && val < 256;
5565 return val > -4096 && val < 4092;
5568 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5571 if (GET_MODE_SIZE (mode) <= 4
5575 || (mode == QImode && outer == SIGN_EXTEND))))
5579 rtx xiop0 = XEXP (index, 0);
5580 rtx xiop1 = XEXP (index, 1);
5582 return ((arm_address_register_rtx_p (xiop0, strict_p)
5583 && power_of_two_operand (xiop1, SImode))
5584 || (arm_address_register_rtx_p (xiop1, strict_p)
5585 && power_of_two_operand (xiop0, SImode)));
5587 else if (code == LSHIFTRT || code == ASHIFTRT
5588 || code == ASHIFT || code == ROTATERT)
5590 rtx op = XEXP (index, 1);
5592 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5593 && GET_CODE (op) == CONST_INT
5595 && INTVAL (op) <= 31);
5599 /* For ARM v4 we may be doing a sign-extend operation during the
5605 || (outer == SIGN_EXTEND && mode == QImode))
5611 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5613 return (code == CONST_INT
5614 && INTVAL (index) < range
5615 && INTVAL (index) > -range);
5618 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5619 index operand. i.e. 1, 2, 4 or 8. */
5621 thumb2_index_mul_operand (rtx op)
5625 if (GET_CODE(op) != CONST_INT)
5629 return (val == 1 || val == 2 || val == 4 || val == 8);
5632 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5634 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5636 enum rtx_code code = GET_CODE (index);
5638 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5639 /* Standard coprocessor addressing modes. */
5640 if (TARGET_HARD_FLOAT
5641 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5642 && (mode == SFmode || mode == DFmode
5643 || (TARGET_MAVERICK && mode == DImode)))
5644 return (code == CONST_INT && INTVAL (index) < 1024
5645 /* Thumb-2 allows only > -256 index range for it's core register
5646 load/stores. Since we allow SF/DF in core registers, we have
5647 to use the intersection between -256~4096 (core) and -1024~1024
5649 && INTVAL (index) > -256
5650 && (INTVAL (index) & 3) == 0);
5652 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5654 /* For DImode assume values will usually live in core regs
5655 and only allow LDRD addressing modes. */
5656 if (!TARGET_LDRD || mode != DImode)
5657 return (code == CONST_INT
5658 && INTVAL (index) < 1024
5659 && INTVAL (index) > -1024
5660 && (INTVAL (index) & 3) == 0);
5663 /* For quad modes, we restrict the constant offset to be slightly less
5664 than what the instruction format permits. We do this because for
5665 quad mode moves, we will actually decompose them into two separate
5666 double-mode reads or writes. INDEX must therefore be a valid
5667 (double-mode) offset and so should INDEX+8. */
5668 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5669 return (code == CONST_INT
5670 && INTVAL (index) < 1016
5671 && INTVAL (index) > -1024
5672 && (INTVAL (index) & 3) == 0);
5674 /* We have no such constraint on double mode offsets, so we permit the
5675 full range of the instruction format. */
5676 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1024
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 if (arm_address_register_rtx_p (index, strict_p)
5683 && (GET_MODE_SIZE (mode) <= 4))
5686 if (mode == DImode || mode == DFmode)
5688 if (code == CONST_INT)
5690 HOST_WIDE_INT val = INTVAL (index);
5691 /* ??? Can we assume ldrd for thumb2? */
5692 /* Thumb-2 ldrd only has reg+const addressing modes. */
5693 /* ldrd supports offsets of +-1020.
5694 However the ldr fallback does not. */
5695 return val > -256 && val < 256 && (val & 3) == 0;
5703 rtx xiop0 = XEXP (index, 0);
5704 rtx xiop1 = XEXP (index, 1);
5706 return ((arm_address_register_rtx_p (xiop0, strict_p)
5707 && thumb2_index_mul_operand (xiop1))
5708 || (arm_address_register_rtx_p (xiop1, strict_p)
5709 && thumb2_index_mul_operand (xiop0)));
5711 else if (code == ASHIFT)
5713 rtx op = XEXP (index, 1);
5715 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5716 && GET_CODE (op) == CONST_INT
5718 && INTVAL (op) <= 3);
5721 return (code == CONST_INT
5722 && INTVAL (index) < 4096
5723 && INTVAL (index) > -256);
5726 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5728 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5732 if (GET_CODE (x) != REG)
5738 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5740 return (regno <= LAST_LO_REGNUM
5741 || regno > LAST_VIRTUAL_REGISTER
5742 || regno == FRAME_POINTER_REGNUM
5743 || (GET_MODE_SIZE (mode) >= 4
5744 && (regno == STACK_POINTER_REGNUM
5745 || regno >= FIRST_PSEUDO_REGISTER
5746 || x == hard_frame_pointer_rtx
5747 || x == arg_pointer_rtx)));
5750 /* Return nonzero if x is a legitimate index register. This is the case
5751 for any base register that can access a QImode object. */
5753 thumb1_index_register_rtx_p (rtx x, int strict_p)
5755 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5758 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5760 The AP may be eliminated to either the SP or the FP, so we use the
5761 least common denominator, e.g. SImode, and offsets from 0 to 64.
5763 ??? Verify whether the above is the right approach.
5765 ??? Also, the FP may be eliminated to the SP, so perhaps that
5766 needs special handling also.
5768 ??? Look at how the mips16 port solves this problem. It probably uses
5769 better ways to solve some of these problems.
5771 Although it is not incorrect, we don't accept QImode and HImode
5772 addresses based on the frame pointer or arg pointer until the
5773 reload pass starts. This is so that eliminating such addresses
5774 into stack based ones won't produce impossible code. */
5776 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 /* ??? Not clear if this is right. Experiment. */
5779 if (GET_MODE_SIZE (mode) < 4
5780 && !(reload_in_progress || reload_completed)
5781 && (reg_mentioned_p (frame_pointer_rtx, x)
5782 || reg_mentioned_p (arg_pointer_rtx, x)
5783 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5784 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5785 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5786 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5789 /* Accept any base register. SP only in SImode or larger. */
5790 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5793 /* This is PC relative data before arm_reorg runs. */
5794 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5795 && GET_CODE (x) == SYMBOL_REF
5796 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5799 /* This is PC relative data after arm_reorg runs. */
5800 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5802 && (GET_CODE (x) == LABEL_REF
5803 || (GET_CODE (x) == CONST
5804 && GET_CODE (XEXP (x, 0)) == PLUS
5805 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5809 /* Post-inc indexing only supported for SImode and larger. */
5810 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5811 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5814 else if (GET_CODE (x) == PLUS)
5816 /* REG+REG address can be any two index registers. */
5817 /* We disallow FRAME+REG addressing since we know that FRAME
5818 will be replaced with STACK, and SP relative addressing only
5819 permits SP+OFFSET. */
5820 if (GET_MODE_SIZE (mode) <= 4
5821 && XEXP (x, 0) != frame_pointer_rtx
5822 && XEXP (x, 1) != frame_pointer_rtx
5823 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5824 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5825 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5828 /* REG+const has 5-7 bit offset for non-SP registers. */
5829 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5830 || XEXP (x, 0) == arg_pointer_rtx)
5831 && GET_CODE (XEXP (x, 1)) == CONST_INT
5832 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5835 /* REG+const has 10-bit offset for SP, but only SImode and
5836 larger is supported. */
5837 /* ??? Should probably check for DI/DFmode overflow here
5838 just like GO_IF_LEGITIMATE_OFFSET does. */
5839 else if (GET_CODE (XEXP (x, 0)) == REG
5840 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5841 && GET_MODE_SIZE (mode) >= 4
5842 && GET_CODE (XEXP (x, 1)) == CONST_INT
5843 && INTVAL (XEXP (x, 1)) >= 0
5844 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5845 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5848 else if (GET_CODE (XEXP (x, 0)) == REG
5849 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5850 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5851 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5852 && REGNO (XEXP (x, 0))
5853 <= LAST_VIRTUAL_POINTER_REGISTER))
5854 && GET_MODE_SIZE (mode) >= 4
5855 && GET_CODE (XEXP (x, 1)) == CONST_INT
5856 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5860 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5861 && GET_MODE_SIZE (mode) == 4
5862 && GET_CODE (x) == SYMBOL_REF
5863 && CONSTANT_POOL_ADDRESS_P (x)
5865 && symbol_mentioned_p (get_pool_constant (x))
5866 && ! pcrel_constant_p (get_pool_constant (x))))
5872 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5873 instruction of mode MODE. */
5875 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5877 switch (GET_MODE_SIZE (mode))
5880 return val >= 0 && val < 32;
5883 return val >= 0 && val < 64 && (val & 1) == 0;
5887 && (val + GET_MODE_SIZE (mode)) <= 128
5893 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5896 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5897 else if (TARGET_THUMB2)
5898 return thumb2_legitimate_address_p (mode, x, strict_p);
5899 else /* if (TARGET_THUMB1) */
5900 return thumb1_legitimate_address_p (mode, x, strict_p);
5903 /* Build the SYMBOL_REF for __tls_get_addr. */
5905 static GTY(()) rtx tls_get_addr_libfunc;
5908 get_tls_get_addr (void)
5910 if (!tls_get_addr_libfunc)
5911 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5912 return tls_get_addr_libfunc;
5916 arm_load_tp (rtx target)
5919 target = gen_reg_rtx (SImode);
5923 /* Can return in any reg. */
5924 emit_insn (gen_load_tp_hard (target));
5928 /* Always returned in r0. Immediately copy the result into a pseudo,
5929 otherwise other uses of r0 (e.g. setting up function arguments) may
5930 clobber the value. */
5934 emit_insn (gen_load_tp_soft ());
5936 tmp = gen_rtx_REG (SImode, 0);
5937 emit_move_insn (target, tmp);
5943 load_tls_operand (rtx x, rtx reg)
5947 if (reg == NULL_RTX)
5948 reg = gen_reg_rtx (SImode);
5950 tmp = gen_rtx_CONST (SImode, x);
5952 emit_move_insn (reg, tmp);
5958 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5960 rtx insns, label, labelno, sum;
5964 labelno = GEN_INT (pic_labelno++);
5965 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5966 label = gen_rtx_CONST (VOIDmode, label);
5968 sum = gen_rtx_UNSPEC (Pmode,
5969 gen_rtvec (4, x, GEN_INT (reloc), label,
5970 GEN_INT (TARGET_ARM ? 8 : 4)),
5972 reg = load_tls_operand (sum, reg);
5975 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5976 else if (TARGET_THUMB2)
5977 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5978 else /* TARGET_THUMB1 */
5979 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5981 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5982 Pmode, 1, reg, Pmode);
5984 insns = get_insns ();
5991 legitimize_tls_address (rtx x, rtx reg)
5993 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5994 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5998 case TLS_MODEL_GLOBAL_DYNAMIC:
5999 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6000 dest = gen_reg_rtx (Pmode);
6001 emit_libcall_block (insns, dest, ret, x);
6004 case TLS_MODEL_LOCAL_DYNAMIC:
6005 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6007 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6008 share the LDM result with other LD model accesses. */
6009 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6011 dest = gen_reg_rtx (Pmode);
6012 emit_libcall_block (insns, dest, ret, eqv);
6014 /* Load the addend. */
6015 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6017 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6018 return gen_rtx_PLUS (Pmode, dest, addend);
6020 case TLS_MODEL_INITIAL_EXEC:
6021 labelno = GEN_INT (pic_labelno++);
6022 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6023 label = gen_rtx_CONST (VOIDmode, label);
6024 sum = gen_rtx_UNSPEC (Pmode,
6025 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6026 GEN_INT (TARGET_ARM ? 8 : 4)),
6028 reg = load_tls_operand (sum, reg);
6031 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6032 else if (TARGET_THUMB2)
6033 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6036 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6037 emit_move_insn (reg, gen_const_mem (SImode, reg));
6040 tp = arm_load_tp (NULL_RTX);
6042 return gen_rtx_PLUS (Pmode, tp, reg);
6044 case TLS_MODEL_LOCAL_EXEC:
6045 tp = arm_load_tp (NULL_RTX);
6047 reg = gen_rtx_UNSPEC (Pmode,
6048 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6050 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6052 return gen_rtx_PLUS (Pmode, tp, reg);
6059 /* Try machine-dependent ways of modifying an illegitimate address
6060 to be legitimate. If we find one, return the new, valid address. */
6062 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6066 /* TODO: legitimize_address for Thumb2. */
6069 return thumb_legitimize_address (x, orig_x, mode);
6072 if (arm_tls_symbol_p (x))
6073 return legitimize_tls_address (x, NULL_RTX);
6075 if (GET_CODE (x) == PLUS)
6077 rtx xop0 = XEXP (x, 0);
6078 rtx xop1 = XEXP (x, 1);
6080 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6081 xop0 = force_reg (SImode, xop0);
6083 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6084 xop1 = force_reg (SImode, xop1);
6086 if (ARM_BASE_REGISTER_RTX_P (xop0)
6087 && GET_CODE (xop1) == CONST_INT)
6089 HOST_WIDE_INT n, low_n;
6093 /* VFP addressing modes actually allow greater offsets, but for
6094 now we just stick with the lowest common denominator. */
6096 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6108 low_n = ((mode) == TImode ? 0
6109 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6113 base_reg = gen_reg_rtx (SImode);
6114 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6115 emit_move_insn (base_reg, val);
6116 x = plus_constant (base_reg, low_n);
6118 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6119 x = gen_rtx_PLUS (SImode, xop0, xop1);
6122 /* XXX We don't allow MINUS any more -- see comment in
6123 arm_legitimate_address_outer_p (). */
6124 else if (GET_CODE (x) == MINUS)
6126 rtx xop0 = XEXP (x, 0);
6127 rtx xop1 = XEXP (x, 1);
6129 if (CONSTANT_P (xop0))
6130 xop0 = force_reg (SImode, xop0);
6132 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6133 xop1 = force_reg (SImode, xop1);
6135 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6136 x = gen_rtx_MINUS (SImode, xop0, xop1);
6139 /* Make sure to take full advantage of the pre-indexed addressing mode
6140 with absolute addresses which often allows for the base register to
6141 be factorized for multiple adjacent memory references, and it might
6142 even allows for the mini pool to be avoided entirely. */
6143 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6146 HOST_WIDE_INT mask, base, index;
6149 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6150 use a 8-bit index. So let's use a 12-bit index for SImode only and
6151 hope that arm_gen_constant will enable ldrb to use more bits. */
6152 bits = (mode == SImode) ? 12 : 8;
6153 mask = (1 << bits) - 1;
6154 base = INTVAL (x) & ~mask;
6155 index = INTVAL (x) & mask;
6156 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6158 /* It'll most probably be more efficient to generate the base
6159 with more bits set and use a negative index instead. */
6163 base_reg = force_reg (SImode, GEN_INT (base));
6164 x = plus_constant (base_reg, index);
6169 /* We need to find and carefully transform any SYMBOL and LABEL
6170 references; so go back to the original address expression. */
6171 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6173 if (new_x != orig_x)
6181 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6182 to be legitimate. If we find one, return the new, valid address. */
6184 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6186 if (arm_tls_symbol_p (x))
6187 return legitimize_tls_address (x, NULL_RTX);
6189 if (GET_CODE (x) == PLUS
6190 && GET_CODE (XEXP (x, 1)) == CONST_INT
6191 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6192 || INTVAL (XEXP (x, 1)) < 0))
6194 rtx xop0 = XEXP (x, 0);
6195 rtx xop1 = XEXP (x, 1);
6196 HOST_WIDE_INT offset = INTVAL (xop1);
6198 /* Try and fold the offset into a biasing of the base register and
6199 then offsetting that. Don't do this when optimizing for space
6200 since it can cause too many CSEs. */
6201 if (optimize_size && offset >= 0
6202 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6204 HOST_WIDE_INT delta;
6207 delta = offset - (256 - GET_MODE_SIZE (mode));
6208 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6209 delta = 31 * GET_MODE_SIZE (mode);
6211 delta = offset & (~31 * GET_MODE_SIZE (mode));
6213 xop0 = force_operand (plus_constant (xop0, offset - delta),
6215 x = plus_constant (xop0, delta);
6217 else if (offset < 0 && offset > -256)
6218 /* Small negative offsets are best done with a subtract before the
6219 dereference, forcing these into a register normally takes two
6221 x = force_operand (x, NULL_RTX);
6224 /* For the remaining cases, force the constant into a register. */
6225 xop1 = force_reg (SImode, xop1);
6226 x = gen_rtx_PLUS (SImode, xop0, xop1);
6229 else if (GET_CODE (x) == PLUS
6230 && s_register_operand (XEXP (x, 1), SImode)
6231 && !s_register_operand (XEXP (x, 0), SImode))
6233 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6235 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6240 /* We need to find and carefully transform any SYMBOL and LABEL
6241 references; so go back to the original address expression. */
6242 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6244 if (new_x != orig_x)
6252 arm_legitimize_reload_address (rtx *p,
6253 enum machine_mode mode,
6254 int opnum, int type,
6255 int ind_levels ATTRIBUTE_UNUSED)
6257 if (GET_CODE (*p) == PLUS
6258 && GET_CODE (XEXP (*p, 0)) == REG
6259 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6260 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6262 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6263 HOST_WIDE_INT low, high;
6265 /* Detect coprocessor load/stores. */
6266 bool coproc_p = ((TARGET_HARD_FLOAT
6267 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6268 && (mode == SFmode || mode == DFmode
6269 || (mode == DImode && TARGET_MAVERICK)))
6270 || (TARGET_REALLY_IWMMXT
6271 && VALID_IWMMXT_REG_MODE (mode))
6273 && (VALID_NEON_DREG_MODE (mode)
6274 || VALID_NEON_QREG_MODE (mode))));
6276 /* For some conditions, bail out when lower two bits are unaligned. */
6277 if ((val & 0x3) != 0
6278 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6280 /* For DI, and DF under soft-float: */
6281 || ((mode == DImode || mode == DFmode)
6282 /* Without ldrd, we use stm/ldm, which does not
6283 fair well with unaligned bits. */
6285 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6286 || TARGET_THUMB2))))
6289 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6290 of which the (reg+high) gets turned into a reload add insn,
6291 we try to decompose the index into high/low values that can often
6292 also lead to better reload CSE.
6294 ldr r0, [r2, #4100] // Offset too large
6295 ldr r1, [r2, #4104] // Offset too large
6297 is best reloaded as:
6303 which post-reload CSE can simplify in most cases to eliminate the
6304 second add instruction:
6309 The idea here is that we want to split out the bits of the constant
6310 as a mask, rather than as subtracting the maximum offset that the
6311 respective type of load/store used can handle.
6313 When encountering negative offsets, we can still utilize it even if
6314 the overall offset is positive; sometimes this may lead to an immediate
6315 that can be constructed with fewer instructions.
6317 ldr r0, [r2, #0x3FFFFC]
6319 This is best reloaded as:
6320 add t1, r2, #0x400000
6323 The trick for spotting this for a load insn with N bits of offset
6324 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6325 negative offset that is going to make bit N and all the bits below
6326 it become zero in the remainder part.
6328 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6329 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6330 used in most cases of ARM load/store instructions. */
6332 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6333 (((VAL) & ((1 << (N)) - 1)) \
6334 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6338 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6339 else if (GET_MODE_SIZE (mode) == 8)
6342 low = (TARGET_THUMB2
6343 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6344 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6346 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6347 to access doublewords. The supported load/store offsets are
6348 -8, -4, and 4, which we try to produce here. */
6349 low = ((val & 0xf) ^ 0x8) - 0x8;
6351 else if (GET_MODE_SIZE (mode) < 8)
6353 /* NEON element load/stores do not have an offset. */
6354 if (TARGET_NEON_FP16 && mode == HFmode)
6359 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6360 Try the wider 12-bit range first, and re-try if the result
6362 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6364 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6368 if (mode == HImode || mode == HFmode)
6371 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6374 /* The storehi/movhi_bytes fallbacks can use only
6375 [-4094,+4094] of the full ldrb/strb index range. */
6376 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6377 if (low == 4095 || low == -4095)
6382 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6388 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6389 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6390 - (unsigned HOST_WIDE_INT) 0x80000000);
6391 /* Check for overflow or zero */
6392 if (low == 0 || high == 0 || (high + low != val))
6395 /* Reload the high part into a base reg; leave the low part
6397 *p = gen_rtx_PLUS (GET_MODE (*p),
6398 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6401 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6402 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6403 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6411 thumb_legitimize_reload_address (rtx *x_p,
6412 enum machine_mode mode,
6413 int opnum, int type,
6414 int ind_levels ATTRIBUTE_UNUSED)
6418 if (GET_CODE (x) == PLUS
6419 && GET_MODE_SIZE (mode) < 4
6420 && REG_P (XEXP (x, 0))
6421 && XEXP (x, 0) == stack_pointer_rtx
6422 && GET_CODE (XEXP (x, 1)) == CONST_INT
6423 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6428 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6429 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6433 /* If both registers are hi-regs, then it's better to reload the
6434 entire expression rather than each register individually. That
6435 only requires one reload register rather than two. */
6436 if (GET_CODE (x) == PLUS
6437 && REG_P (XEXP (x, 0))
6438 && REG_P (XEXP (x, 1))
6439 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6440 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6445 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6446 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6453 /* Test for various thread-local symbols. */
6455 /* Return TRUE if X is a thread-local symbol. */
6458 arm_tls_symbol_p (rtx x)
6460 if (! TARGET_HAVE_TLS)
6463 if (GET_CODE (x) != SYMBOL_REF)
6466 return SYMBOL_REF_TLS_MODEL (x) != 0;
6469 /* Helper for arm_tls_referenced_p. */
6472 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6474 if (GET_CODE (*x) == SYMBOL_REF)
6475 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6477 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6478 TLS offsets, not real symbol references. */
6479 if (GET_CODE (*x) == UNSPEC
6480 && XINT (*x, 1) == UNSPEC_TLS)
6486 /* Return TRUE if X contains any TLS symbol references. */
6489 arm_tls_referenced_p (rtx x)
6491 if (! TARGET_HAVE_TLS)
6494 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6497 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6500 arm_cannot_force_const_mem (rtx x)
6504 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6506 split_const (x, &base, &offset);
6507 if (GET_CODE (base) == SYMBOL_REF
6508 && !offset_within_block_p (base, INTVAL (offset)))
6511 return arm_tls_referenced_p (x);
6514 #define REG_OR_SUBREG_REG(X) \
6515 (GET_CODE (X) == REG \
6516 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6518 #define REG_OR_SUBREG_RTX(X) \
6519 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6522 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6524 enum machine_mode mode = GET_MODE (x);
6538 return COSTS_N_INSNS (1);
6541 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6544 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6551 return COSTS_N_INSNS (2) + cycles;
6553 return COSTS_N_INSNS (1) + 16;
6556 return (COSTS_N_INSNS (1)
6557 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6558 + GET_CODE (SET_DEST (x)) == MEM));
6563 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6565 if (thumb_shiftable_const (INTVAL (x)))
6566 return COSTS_N_INSNS (2);
6567 return COSTS_N_INSNS (3);
6569 else if ((outer == PLUS || outer == COMPARE)
6570 && INTVAL (x) < 256 && INTVAL (x) > -256)
6572 else if ((outer == IOR || outer == XOR || outer == AND)
6573 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6574 return COSTS_N_INSNS (1);
6575 else if (outer == AND)
6578 /* This duplicates the tests in the andsi3 expander. */
6579 for (i = 9; i <= 31; i++)
6580 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6581 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6582 return COSTS_N_INSNS (2);
6584 else if (outer == ASHIFT || outer == ASHIFTRT
6585 || outer == LSHIFTRT)
6587 return COSTS_N_INSNS (2);
6593 return COSTS_N_INSNS (3);
6611 /* XXX another guess. */
6612 /* Memory costs quite a lot for the first word, but subsequent words
6613 load at the equivalent of a single insn each. */
6614 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6615 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6620 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6626 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6627 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6633 return total + COSTS_N_INSNS (1);
6635 /* Assume a two-shift sequence. Increase the cost slightly so
6636 we prefer actual shifts over an extend operation. */
6637 return total + 1 + COSTS_N_INSNS (2);
6645 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6647 enum machine_mode mode = GET_MODE (x);
6648 enum rtx_code subcode;
6650 enum rtx_code code = GET_CODE (x);
6656 /* Memory costs quite a lot for the first word, but subsequent words
6657 load at the equivalent of a single insn each. */
6658 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6665 if (TARGET_HARD_FLOAT && mode == SFmode)
6666 *total = COSTS_N_INSNS (2);
6667 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6668 *total = COSTS_N_INSNS (4);
6670 *total = COSTS_N_INSNS (20);
6674 if (GET_CODE (XEXP (x, 1)) == REG)
6675 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6676 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6677 *total = rtx_cost (XEXP (x, 1), code, speed);
6683 *total += COSTS_N_INSNS (4);
6688 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6689 *total += rtx_cost (XEXP (x, 0), code, speed);
6692 *total += COSTS_N_INSNS (3);
6696 *total += COSTS_N_INSNS (1);
6697 /* Increase the cost of complex shifts because they aren't any faster,
6698 and reduce dual issue opportunities. */
6699 if (arm_tune_cortex_a9
6700 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6708 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6709 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6710 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6712 *total += rtx_cost (XEXP (x, 1), code, speed);
6716 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6717 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6719 *total += rtx_cost (XEXP (x, 0), code, speed);
6726 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6728 if (TARGET_HARD_FLOAT
6730 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6732 *total = COSTS_N_INSNS (1);
6733 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6734 && arm_const_double_rtx (XEXP (x, 0)))
6736 *total += rtx_cost (XEXP (x, 1), code, speed);
6740 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6741 && arm_const_double_rtx (XEXP (x, 1)))
6743 *total += rtx_cost (XEXP (x, 0), code, speed);
6749 *total = COSTS_N_INSNS (20);
6753 *total = COSTS_N_INSNS (1);
6754 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6755 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6757 *total += rtx_cost (XEXP (x, 1), code, speed);
6761 subcode = GET_CODE (XEXP (x, 1));
6762 if (subcode == ASHIFT || subcode == ASHIFTRT
6763 || subcode == LSHIFTRT
6764 || subcode == ROTATE || subcode == ROTATERT)
6766 *total += rtx_cost (XEXP (x, 0), code, speed);
6767 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6771 /* A shift as a part of RSB costs no more than RSB itself. */
6772 if (GET_CODE (XEXP (x, 0)) == MULT
6773 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6775 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6776 *total += rtx_cost (XEXP (x, 1), code, speed);
6781 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6783 *total += rtx_cost (XEXP (x, 0), code, speed);
6784 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6788 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6789 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6791 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6792 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6793 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6794 *total += COSTS_N_INSNS (1);
6802 if (code == PLUS && arm_arch6 && mode == SImode
6803 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6804 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6806 *total = COSTS_N_INSNS (1);
6807 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6809 *total += rtx_cost (XEXP (x, 1), code, speed);
6813 /* MLA: All arguments must be registers. We filter out
6814 multiplication by a power of two, so that we fall down into
6816 if (GET_CODE (XEXP (x, 0)) == MULT
6817 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6819 /* The cost comes from the cost of the multiply. */
6823 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6825 if (TARGET_HARD_FLOAT
6827 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6829 *total = COSTS_N_INSNS (1);
6830 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6831 && arm_const_double_rtx (XEXP (x, 1)))
6833 *total += rtx_cost (XEXP (x, 0), code, speed);
6840 *total = COSTS_N_INSNS (20);
6844 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6845 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6847 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6848 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6849 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6850 *total += COSTS_N_INSNS (1);
6856 case AND: case XOR: case IOR:
6858 /* Normally the frame registers will be spilt into reg+const during
6859 reload, so it is a bad idea to combine them with other instructions,
6860 since then they might not be moved outside of loops. As a compromise
6861 we allow integration with ops that have a constant as their second
6863 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6864 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6865 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6866 *total = COSTS_N_INSNS (1);
6870 *total += COSTS_N_INSNS (2);
6871 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6872 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6874 *total += rtx_cost (XEXP (x, 0), code, speed);
6881 *total += COSTS_N_INSNS (1);
6882 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6883 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6885 *total += rtx_cost (XEXP (x, 0), code, speed);
6888 subcode = GET_CODE (XEXP (x, 0));
6889 if (subcode == ASHIFT || subcode == ASHIFTRT
6890 || subcode == LSHIFTRT
6891 || subcode == ROTATE || subcode == ROTATERT)
6893 *total += rtx_cost (XEXP (x, 1), code, speed);
6894 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6899 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6901 *total += rtx_cost (XEXP (x, 1), code, speed);
6902 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6906 if (subcode == UMIN || subcode == UMAX
6907 || subcode == SMIN || subcode == SMAX)
6909 *total = COSTS_N_INSNS (3);
6916 /* This should have been handled by the CPU specific routines. */
6920 if (arm_arch3m && mode == SImode
6921 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6922 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6923 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6924 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6925 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6926 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6928 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6931 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6935 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6937 if (TARGET_HARD_FLOAT
6939 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6941 *total = COSTS_N_INSNS (1);
6944 *total = COSTS_N_INSNS (2);
6950 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6951 if (mode == SImode && code == NOT)
6953 subcode = GET_CODE (XEXP (x, 0));
6954 if (subcode == ASHIFT || subcode == ASHIFTRT
6955 || subcode == LSHIFTRT
6956 || subcode == ROTATE || subcode == ROTATERT
6958 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6960 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6961 /* Register shifts cost an extra cycle. */
6962 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6963 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6972 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6974 *total = COSTS_N_INSNS (4);
6978 operand = XEXP (x, 0);
6980 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6981 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6982 && GET_CODE (XEXP (operand, 0)) == REG
6983 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6984 *total += COSTS_N_INSNS (1);
6985 *total += (rtx_cost (XEXP (x, 1), code, speed)
6986 + rtx_cost (XEXP (x, 2), code, speed));
6990 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6992 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6998 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6999 && mode == SImode && XEXP (x, 1) == const0_rtx)
7001 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7007 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7008 && mode == SImode && XEXP (x, 1) == const0_rtx)
7010 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7030 /* SCC insns. In the case where the comparison has already been
7031 performed, then they cost 2 instructions. Otherwise they need
7032 an additional comparison before them. */
7033 *total = COSTS_N_INSNS (2);
7034 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7041 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7047 *total += COSTS_N_INSNS (1);
7048 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7049 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7051 *total += rtx_cost (XEXP (x, 0), code, speed);
7055 subcode = GET_CODE (XEXP (x, 0));
7056 if (subcode == ASHIFT || subcode == ASHIFTRT
7057 || subcode == LSHIFTRT
7058 || subcode == ROTATE || subcode == ROTATERT)
7060 *total += rtx_cost (XEXP (x, 1), code, speed);
7061 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7066 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7068 *total += rtx_cost (XEXP (x, 1), code, speed);
7069 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7079 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7080 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7081 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7082 *total += rtx_cost (XEXP (x, 1), code, speed);
7086 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7088 if (TARGET_HARD_FLOAT
7090 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7092 *total = COSTS_N_INSNS (1);
7095 *total = COSTS_N_INSNS (20);
7098 *total = COSTS_N_INSNS (1);
7100 *total += COSTS_N_INSNS (3);
7106 if (GET_MODE_CLASS (mode) == MODE_INT)
7108 rtx op = XEXP (x, 0);
7109 enum machine_mode opmode = GET_MODE (op);
7112 *total += COSTS_N_INSNS (1);
7114 if (opmode != SImode)
7118 /* If !arm_arch4, we use one of the extendhisi2_mem
7119 or movhi_bytes patterns for HImode. For a QImode
7120 sign extension, we first zero-extend from memory
7121 and then perform a shift sequence. */
7122 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7123 *total += COSTS_N_INSNS (2);
7126 *total += COSTS_N_INSNS (1);
7128 /* We don't have the necessary insn, so we need to perform some
7130 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7131 /* An and with constant 255. */
7132 *total += COSTS_N_INSNS (1);
7134 /* A shift sequence. Increase costs slightly to avoid
7135 combining two shifts into an extend operation. */
7136 *total += COSTS_N_INSNS (2) + 1;
7142 switch (GET_MODE (XEXP (x, 0)))
7149 *total = COSTS_N_INSNS (1);
7159 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7163 if (const_ok_for_arm (INTVAL (x))
7164 || const_ok_for_arm (~INTVAL (x)))
7165 *total = COSTS_N_INSNS (1);
7167 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7168 INTVAL (x), NULL_RTX,
7175 *total = COSTS_N_INSNS (3);
7179 *total = COSTS_N_INSNS (1);
7183 *total = COSTS_N_INSNS (1);
7184 *total += rtx_cost (XEXP (x, 0), code, speed);
7188 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7189 && (mode == SFmode || !TARGET_VFP_SINGLE))
7190 *total = COSTS_N_INSNS (1);
7192 *total = COSTS_N_INSNS (4);
7196 *total = COSTS_N_INSNS (4);
7201 /* Estimates the size cost of thumb1 instructions.
7202 For now most of the code is copied from thumb1_rtx_costs. We need more
7203 fine grain tuning when we have more related test cases. */
7205 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7207 enum machine_mode mode = GET_MODE (x);
7220 return COSTS_N_INSNS (1);
7223 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7225 /* Thumb1 mul instruction can't operate on const. We must Load it
7226 into a register first. */
7227 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7228 return COSTS_N_INSNS (1) + const_size;
7230 return COSTS_N_INSNS (1);
7233 return (COSTS_N_INSNS (1)
7234 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7235 + GET_CODE (SET_DEST (x)) == MEM));
7240 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7241 return COSTS_N_INSNS (1);
7242 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7243 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7244 return COSTS_N_INSNS (2);
7245 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7246 if (thumb_shiftable_const (INTVAL (x)))
7247 return COSTS_N_INSNS (2);
7248 return COSTS_N_INSNS (3);
7250 else if ((outer == PLUS || outer == COMPARE)
7251 && INTVAL (x) < 256 && INTVAL (x) > -256)
7253 else if ((outer == IOR || outer == XOR || outer == AND)
7254 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7255 return COSTS_N_INSNS (1);
7256 else if (outer == AND)
7259 /* This duplicates the tests in the andsi3 expander. */
7260 for (i = 9; i <= 31; i++)
7261 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7262 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7263 return COSTS_N_INSNS (2);
7265 else if (outer == ASHIFT || outer == ASHIFTRT
7266 || outer == LSHIFTRT)
7268 return COSTS_N_INSNS (2);
7274 return COSTS_N_INSNS (3);
7292 /* XXX another guess. */
7293 /* Memory costs quite a lot for the first word, but subsequent words
7294 load at the equivalent of a single insn each. */
7295 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7296 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7301 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7306 /* XXX still guessing. */
7307 switch (GET_MODE (XEXP (x, 0)))
7310 return (1 + (mode == DImode ? 4 : 0)
7311 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7314 return (4 + (mode == DImode ? 4 : 0)
7315 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7318 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7329 /* RTX costs when optimizing for size. */
7331 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7334 enum machine_mode mode = GET_MODE (x);
7337 *total = thumb1_size_rtx_costs (x, code, outer_code);
7341 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7345 /* A memory access costs 1 insn if the mode is small, or the address is
7346 a single register, otherwise it costs one insn per word. */
7347 if (REG_P (XEXP (x, 0)))
7348 *total = COSTS_N_INSNS (1);
7350 && GET_CODE (XEXP (x, 0)) == PLUS
7351 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7352 /* This will be split into two instructions.
7353 See arm.md:calculate_pic_address. */
7354 *total = COSTS_N_INSNS (2);
7356 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7363 /* Needs a libcall, so it costs about this. */
7364 *total = COSTS_N_INSNS (2);
7368 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7370 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7378 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7380 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7383 else if (mode == SImode)
7385 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7386 /* Slightly disparage register shifts, but not by much. */
7387 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7388 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7392 /* Needs a libcall. */
7393 *total = COSTS_N_INSNS (2);
7397 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7398 && (mode == SFmode || !TARGET_VFP_SINGLE))
7400 *total = COSTS_N_INSNS (1);
7406 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7407 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7409 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7410 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7411 || subcode1 == ROTATE || subcode1 == ROTATERT
7412 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7413 || subcode1 == ASHIFTRT)
7415 /* It's just the cost of the two operands. */
7420 *total = COSTS_N_INSNS (1);
7424 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7428 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7429 && (mode == SFmode || !TARGET_VFP_SINGLE))
7431 *total = COSTS_N_INSNS (1);
7435 /* A shift as a part of ADD costs nothing. */
7436 if (GET_CODE (XEXP (x, 0)) == MULT
7437 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7439 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7440 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7441 *total += rtx_cost (XEXP (x, 1), code, false);
7446 case AND: case XOR: case IOR:
7449 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7451 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7452 || subcode == LSHIFTRT || subcode == ASHIFTRT
7453 || (code == AND && subcode == NOT))
7455 /* It's just the cost of the two operands. */
7461 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7465 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7469 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7470 && (mode == SFmode || !TARGET_VFP_SINGLE))
7472 *total = COSTS_N_INSNS (1);
7478 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7487 if (cc_register (XEXP (x, 0), VOIDmode))
7490 *total = COSTS_N_INSNS (1);
7494 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7495 && (mode == SFmode || !TARGET_VFP_SINGLE))
7496 *total = COSTS_N_INSNS (1);
7498 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7503 return arm_rtx_costs_1 (x, outer_code, total, 0);
7506 if (const_ok_for_arm (INTVAL (x)))
7507 /* A multiplication by a constant requires another instruction
7508 to load the constant to a register. */
7509 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7511 else if (const_ok_for_arm (~INTVAL (x)))
7512 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7513 else if (const_ok_for_arm (-INTVAL (x)))
7515 if (outer_code == COMPARE || outer_code == PLUS
7516 || outer_code == MINUS)
7519 *total = COSTS_N_INSNS (1);
7522 *total = COSTS_N_INSNS (2);
7528 *total = COSTS_N_INSNS (2);
7532 *total = COSTS_N_INSNS (4);
7537 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7538 cost of these slightly. */
7539 *total = COSTS_N_INSNS (1) + 1;
7543 if (mode != VOIDmode)
7544 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7546 *total = COSTS_N_INSNS (4); /* How knows? */
7551 /* RTX costs when optimizing for size. */
7553 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7557 return arm_size_rtx_costs (x, (enum rtx_code) code,
7558 (enum rtx_code) outer_code, total);
7560 return current_tune->rtx_costs (x, (enum rtx_code) code,
7561 (enum rtx_code) outer_code,
7565 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7566 supported on any "slowmul" cores, so it can be ignored. */
7569 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7570 int *total, bool speed)
7572 enum machine_mode mode = GET_MODE (x);
7576 *total = thumb1_rtx_costs (x, code, outer_code);
7583 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7586 *total = COSTS_N_INSNS (20);
7590 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7592 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7593 & (unsigned HOST_WIDE_INT) 0xffffffff);
7594 int cost, const_ok = const_ok_for_arm (i);
7595 int j, booth_unit_size;
7597 /* Tune as appropriate. */
7598 cost = const_ok ? 4 : 8;
7599 booth_unit_size = 2;
7600 for (j = 0; i && j < 32; j += booth_unit_size)
7602 i >>= booth_unit_size;
7606 *total = COSTS_N_INSNS (cost);
7607 *total += rtx_cost (XEXP (x, 0), code, speed);
7611 *total = COSTS_N_INSNS (20);
7615 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7620 /* RTX cost for cores with a fast multiply unit (M variants). */
7623 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7624 int *total, bool speed)
7626 enum machine_mode mode = GET_MODE (x);
7630 *total = thumb1_rtx_costs (x, code, outer_code);
7634 /* ??? should thumb2 use different costs? */
7638 /* There is no point basing this on the tuning, since it is always the
7639 fast variant if it exists at all. */
7641 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7642 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7643 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7645 *total = COSTS_N_INSNS(2);
7652 *total = COSTS_N_INSNS (5);
7656 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7658 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7659 & (unsigned HOST_WIDE_INT) 0xffffffff);
7660 int cost, const_ok = const_ok_for_arm (i);
7661 int j, booth_unit_size;
7663 /* Tune as appropriate. */
7664 cost = const_ok ? 4 : 8;
7665 booth_unit_size = 8;
7666 for (j = 0; i && j < 32; j += booth_unit_size)
7668 i >>= booth_unit_size;
7672 *total = COSTS_N_INSNS(cost);
7678 *total = COSTS_N_INSNS (4);
7682 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7684 if (TARGET_HARD_FLOAT
7686 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7688 *total = COSTS_N_INSNS (1);
7693 /* Requires a lib call */
7694 *total = COSTS_N_INSNS (20);
7698 return arm_rtx_costs_1 (x, outer_code, total, speed);
7703 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7704 so it can be ignored. */
7707 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7708 int *total, bool speed)
7710 enum machine_mode mode = GET_MODE (x);
7714 *total = thumb1_rtx_costs (x, code, outer_code);
7721 if (GET_CODE (XEXP (x, 0)) != MULT)
7722 return arm_rtx_costs_1 (x, outer_code, total, speed);
7724 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7725 will stall until the multiplication is complete. */
7726 *total = COSTS_N_INSNS (3);
7730 /* There is no point basing this on the tuning, since it is always the
7731 fast variant if it exists at all. */
7733 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7734 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7735 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7737 *total = COSTS_N_INSNS (2);
7744 *total = COSTS_N_INSNS (5);
7748 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7750 /* If operand 1 is a constant we can more accurately
7751 calculate the cost of the multiply. The multiplier can
7752 retire 15 bits on the first cycle and a further 12 on the
7753 second. We do, of course, have to load the constant into
7754 a register first. */
7755 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7756 /* There's a general overhead of one cycle. */
7758 unsigned HOST_WIDE_INT masked_const;
7763 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7765 masked_const = i & 0xffff8000;
7766 if (masked_const != 0)
7769 masked_const = i & 0xf8000000;
7770 if (masked_const != 0)
7773 *total = COSTS_N_INSNS (cost);
7779 *total = COSTS_N_INSNS (3);
7783 /* Requires a lib call */
7784 *total = COSTS_N_INSNS (20);
7788 return arm_rtx_costs_1 (x, outer_code, total, speed);
7793 /* RTX costs for 9e (and later) cores. */
7796 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7797 int *total, bool speed)
7799 enum machine_mode mode = GET_MODE (x);
7806 *total = COSTS_N_INSNS (3);
7810 *total = thumb1_rtx_costs (x, code, outer_code);
7818 /* There is no point basing this on the tuning, since it is always the
7819 fast variant if it exists at all. */
7821 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7822 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7823 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7825 *total = COSTS_N_INSNS (2);
7832 *total = COSTS_N_INSNS (5);
7838 *total = COSTS_N_INSNS (2);
7842 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7844 if (TARGET_HARD_FLOAT
7846 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7848 *total = COSTS_N_INSNS (1);
7853 *total = COSTS_N_INSNS (20);
7857 return arm_rtx_costs_1 (x, outer_code, total, speed);
7860 /* All address computations that can be done are free, but rtx cost returns
7861 the same for practically all of them. So we weight the different types
7862 of address here in the order (most pref first):
7863 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7865 arm_arm_address_cost (rtx x)
7867 enum rtx_code c = GET_CODE (x);
7869 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7871 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7876 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7879 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7889 arm_thumb_address_cost (rtx x)
7891 enum rtx_code c = GET_CODE (x);
7896 && GET_CODE (XEXP (x, 0)) == REG
7897 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7904 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7906 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7909 /* Adjust cost hook for XScale. */
7911 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7913 /* Some true dependencies can have a higher cost depending
7914 on precisely how certain input operands are used. */
7915 if (REG_NOTE_KIND(link) == 0
7916 && recog_memoized (insn) >= 0
7917 && recog_memoized (dep) >= 0)
7919 int shift_opnum = get_attr_shift (insn);
7920 enum attr_type attr_type = get_attr_type (dep);
7922 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7923 operand for INSN. If we have a shifted input operand and the
7924 instruction we depend on is another ALU instruction, then we may
7925 have to account for an additional stall. */
7926 if (shift_opnum != 0
7927 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7929 rtx shifted_operand;
7932 /* Get the shifted operand. */
7933 extract_insn (insn);
7934 shifted_operand = recog_data.operand[shift_opnum];
7936 /* Iterate over all the operands in DEP. If we write an operand
7937 that overlaps with SHIFTED_OPERAND, then we have increase the
7938 cost of this dependency. */
7940 preprocess_constraints ();
7941 for (opno = 0; opno < recog_data.n_operands; opno++)
7943 /* We can ignore strict inputs. */
7944 if (recog_data.operand_type[opno] == OP_IN)
7947 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7959 /* Adjust cost hook for Cortex A9. */
7961 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7963 switch (REG_NOTE_KIND (link))
7970 case REG_DEP_OUTPUT:
7971 if (recog_memoized (insn) >= 0
7972 && recog_memoized (dep) >= 0)
7974 if (GET_CODE (PATTERN (insn)) == SET)
7977 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7979 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7981 enum attr_type attr_type_insn = get_attr_type (insn);
7982 enum attr_type attr_type_dep = get_attr_type (dep);
7984 /* By default all dependencies of the form
7987 have an extra latency of 1 cycle because
7988 of the input and output dependency in this
7989 case. However this gets modeled as an true
7990 dependency and hence all these checks. */
7991 if (REG_P (SET_DEST (PATTERN (insn)))
7992 && REG_P (SET_DEST (PATTERN (dep)))
7993 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7994 SET_DEST (PATTERN (dep))))
7996 /* FMACS is a special case where the dependant
7997 instruction can be issued 3 cycles before
7998 the normal latency in case of an output
8000 if ((attr_type_insn == TYPE_FMACS
8001 || attr_type_insn == TYPE_FMACD)
8002 && (attr_type_dep == TYPE_FMACS
8003 || attr_type_dep == TYPE_FMACD))
8005 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8006 *cost = insn_default_latency (dep) - 3;
8008 *cost = insn_default_latency (dep);
8013 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8014 *cost = insn_default_latency (dep) + 1;
8016 *cost = insn_default_latency (dep);
8032 /* Adjust cost hook for FA726TE. */
8034 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8036 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8037 have penalty of 3. */
8038 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8039 && recog_memoized (insn) >= 0
8040 && recog_memoized (dep) >= 0
8041 && get_attr_conds (dep) == CONDS_SET)
8043 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8044 if (get_attr_conds (insn) == CONDS_USE
8045 && get_attr_type (insn) != TYPE_BRANCH)
8051 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8052 || get_attr_conds (insn) == CONDS_USE)
8062 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8063 It corrects the value of COST based on the relationship between
8064 INSN and DEP through the dependence LINK. It returns the new
8065 value. There is a per-core adjust_cost hook to adjust scheduler costs
8066 and the per-core hook can choose to completely override the generic
8067 adjust_cost function. Only put bits of code into arm_adjust_cost that
8068 are common across all cores. */
8070 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8074 /* When generating Thumb-1 code, we want to place flag-setting operations
8075 close to a conditional branch which depends on them, so that we can
8076 omit the comparison. */
8078 && REG_NOTE_KIND (link) == 0
8079 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8080 && recog_memoized (dep) >= 0
8081 && get_attr_conds (dep) == CONDS_SET)
8084 if (current_tune->sched_adjust_cost != NULL)
8086 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8090 /* XXX This is not strictly true for the FPA. */
8091 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8092 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8095 /* Call insns don't incur a stall, even if they follow a load. */
8096 if (REG_NOTE_KIND (link) == 0
8097 && GET_CODE (insn) == CALL_INSN)
8100 if ((i_pat = single_set (insn)) != NULL
8101 && GET_CODE (SET_SRC (i_pat)) == MEM
8102 && (d_pat = single_set (dep)) != NULL
8103 && GET_CODE (SET_DEST (d_pat)) == MEM)
8105 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8106 /* This is a load after a store, there is no conflict if the load reads
8107 from a cached area. Assume that loads from the stack, and from the
8108 constant pool are cached, and that others will miss. This is a
8111 if ((GET_CODE (src_mem) == SYMBOL_REF
8112 && CONSTANT_POOL_ADDRESS_P (src_mem))
8113 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8114 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8115 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8122 static int fp_consts_inited = 0;
8124 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8125 static const char * const strings_fp[8] =
8128 "4", "5", "0.5", "10"
8131 static REAL_VALUE_TYPE values_fp[8];
8134 init_fp_table (void)
8140 fp_consts_inited = 1;
8142 fp_consts_inited = 8;
8144 for (i = 0; i < fp_consts_inited; i++)
8146 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8151 /* Return TRUE if rtx X is a valid immediate FP constant. */
8153 arm_const_double_rtx (rtx x)
8158 if (!fp_consts_inited)
8161 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8162 if (REAL_VALUE_MINUS_ZERO (r))
8165 for (i = 0; i < fp_consts_inited; i++)
8166 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8172 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8174 neg_const_double_rtx_ok_for_fpa (rtx x)
8179 if (!fp_consts_inited)
8182 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8183 r = real_value_negate (&r);
8184 if (REAL_VALUE_MINUS_ZERO (r))
8187 for (i = 0; i < 8; i++)
8188 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8195 /* VFPv3 has a fairly wide range of representable immediates, formed from
8196 "quarter-precision" floating-point values. These can be evaluated using this
8197 formula (with ^ for exponentiation):
8201 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8202 16 <= n <= 31 and 0 <= r <= 7.
8204 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8206 - A (most-significant) is the sign bit.
8207 - BCD are the exponent (encoded as r XOR 3).
8208 - EFGH are the mantissa (encoded as n - 16).
8211 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8212 fconst[sd] instruction, or -1 if X isn't suitable. */
8214 vfp3_const_double_index (rtx x)
8216 REAL_VALUE_TYPE r, m;
8218 unsigned HOST_WIDE_INT mantissa, mant_hi;
8219 unsigned HOST_WIDE_INT mask;
8220 HOST_WIDE_INT m1, m2;
8221 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8223 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8226 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8228 /* We can't represent these things, so detect them first. */
8229 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8232 /* Extract sign, exponent and mantissa. */
8233 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8234 r = real_value_abs (&r);
8235 exponent = REAL_EXP (&r);
8236 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8237 highest (sign) bit, with a fixed binary point at bit point_pos.
8238 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8239 bits for the mantissa, this may fail (low bits would be lost). */
8240 real_ldexp (&m, &r, point_pos - exponent);
8241 REAL_VALUE_TO_INT (&m1, &m2, m);
8245 /* If there are bits set in the low part of the mantissa, we can't
8246 represent this value. */
8250 /* Now make it so that mantissa contains the most-significant bits, and move
8251 the point_pos to indicate that the least-significant bits have been
8253 point_pos -= HOST_BITS_PER_WIDE_INT;
8256 /* We can permit four significant bits of mantissa only, plus a high bit
8257 which is always 1. */
8258 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8259 if ((mantissa & mask) != 0)
8262 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8263 mantissa >>= point_pos - 5;
8265 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8266 floating-point immediate zero with Neon using an integer-zero load, but
8267 that case is handled elsewhere.) */
8271 gcc_assert (mantissa >= 16 && mantissa <= 31);
8273 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8274 normalized significands are in the range [1, 2). (Our mantissa is shifted
8275 left 4 places at this point relative to normalized IEEE754 values). GCC
8276 internally uses [0.5, 1) (see real.c), so the exponent returned from
8277 REAL_EXP must be altered. */
8278 exponent = 5 - exponent;
8280 if (exponent < 0 || exponent > 7)
8283 /* Sign, mantissa and exponent are now in the correct form to plug into the
8284 formula described in the comment above. */
8285 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8288 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8290 vfp3_const_double_rtx (rtx x)
8295 return vfp3_const_double_index (x) != -1;
8298 /* Recognize immediates which can be used in various Neon instructions. Legal
8299 immediates are described by the following table (for VMVN variants, the
8300 bitwise inverse of the constant shown is recognized. In either case, VMOV
8301 is output and the correct instruction to use for a given constant is chosen
8302 by the assembler). The constant shown is replicated across all elements of
8303 the destination vector.
8305 insn elems variant constant (binary)
8306 ---- ----- ------- -----------------
8307 vmov i32 0 00000000 00000000 00000000 abcdefgh
8308 vmov i32 1 00000000 00000000 abcdefgh 00000000
8309 vmov i32 2 00000000 abcdefgh 00000000 00000000
8310 vmov i32 3 abcdefgh 00000000 00000000 00000000
8311 vmov i16 4 00000000 abcdefgh
8312 vmov i16 5 abcdefgh 00000000
8313 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8314 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8315 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8316 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8317 vmvn i16 10 00000000 abcdefgh
8318 vmvn i16 11 abcdefgh 00000000
8319 vmov i32 12 00000000 00000000 abcdefgh 11111111
8320 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8321 vmov i32 14 00000000 abcdefgh 11111111 11111111
8322 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8324 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8325 eeeeeeee ffffffff gggggggg hhhhhhhh
8326 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8328 For case 18, B = !b. Representable values are exactly those accepted by
8329 vfp3_const_double_index, but are output as floating-point numbers rather
8332 Variants 0-5 (inclusive) may also be used as immediates for the second
8333 operand of VORR/VBIC instructions.
8335 The INVERSE argument causes the bitwise inverse of the given operand to be
8336 recognized instead (used for recognizing legal immediates for the VAND/VORN
8337 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8338 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8339 output, rather than the real insns vbic/vorr).
8341 INVERSE makes no difference to the recognition of float vectors.
8343 The return value is the variant of immediate as shown in the above table, or
8344 -1 if the given value doesn't match any of the listed patterns.
8347 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8348 rtx *modconst, int *elementwidth)
8350 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8352 for (i = 0; i < idx; i += (STRIDE)) \
8357 immtype = (CLASS); \
8358 elsize = (ELSIZE); \
8362 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8363 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8364 unsigned char bytes[16];
8365 int immtype = -1, matches;
8366 unsigned int invmask = inverse ? 0xff : 0;
8368 /* Vectors of float constants. */
8369 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8371 rtx el0 = CONST_VECTOR_ELT (op, 0);
8374 if (!vfp3_const_double_rtx (el0))
8377 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8379 for (i = 1; i < n_elts; i++)
8381 rtx elt = CONST_VECTOR_ELT (op, i);
8384 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8386 if (!REAL_VALUES_EQUAL (r0, re))
8391 *modconst = CONST_VECTOR_ELT (op, 0);
8399 /* Splat vector constant out into a byte vector. */
8400 for (i = 0; i < n_elts; i++)
8402 rtx el = CONST_VECTOR_ELT (op, i);
8403 unsigned HOST_WIDE_INT elpart;
8404 unsigned int part, parts;
8406 if (GET_CODE (el) == CONST_INT)
8408 elpart = INTVAL (el);
8411 else if (GET_CODE (el) == CONST_DOUBLE)
8413 elpart = CONST_DOUBLE_LOW (el);
8419 for (part = 0; part < parts; part++)
8422 for (byte = 0; byte < innersize; byte++)
8424 bytes[idx++] = (elpart & 0xff) ^ invmask;
8425 elpart >>= BITS_PER_UNIT;
8427 if (GET_CODE (el) == CONST_DOUBLE)
8428 elpart = CONST_DOUBLE_HIGH (el);
8433 gcc_assert (idx == GET_MODE_SIZE (mode));
8437 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8438 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8440 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8441 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8443 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8444 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8446 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8447 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8449 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8451 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8453 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8454 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8456 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8457 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8459 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8460 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8462 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8463 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8465 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8467 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8469 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8470 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8472 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8473 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8475 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8476 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8478 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8479 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8481 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8483 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8484 && bytes[i] == bytes[(i + 8) % idx]);
8492 *elementwidth = elsize;
8496 unsigned HOST_WIDE_INT imm = 0;
8498 /* Un-invert bytes of recognized vector, if necessary. */
8500 for (i = 0; i < idx; i++)
8501 bytes[i] ^= invmask;
8505 /* FIXME: Broken on 32-bit H_W_I hosts. */
8506 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8508 for (i = 0; i < 8; i++)
8509 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8510 << (i * BITS_PER_UNIT);
8512 *modconst = GEN_INT (imm);
8516 unsigned HOST_WIDE_INT imm = 0;
8518 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8519 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8521 *modconst = GEN_INT (imm);
8529 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8530 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8531 float elements), and a modified constant (whatever should be output for a
8532 VMOV) in *MODCONST. */
8535 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8536 rtx *modconst, int *elementwidth)
8540 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8546 *modconst = tmpconst;
8549 *elementwidth = tmpwidth;
8554 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8555 the immediate is valid, write a constant suitable for using as an operand
8556 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8557 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8560 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8561 rtx *modconst, int *elementwidth)
8565 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8567 if (retval < 0 || retval > 5)
8571 *modconst = tmpconst;
8574 *elementwidth = tmpwidth;
8579 /* Return a string suitable for output of Neon immediate logic operation
8583 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8584 int inverse, int quad)
8586 int width, is_valid;
8587 static char templ[40];
8589 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8591 gcc_assert (is_valid != 0);
8594 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8596 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8601 /* Output a sequence of pairwise operations to implement a reduction.
8602 NOTE: We do "too much work" here, because pairwise operations work on two
8603 registers-worth of operands in one go. Unfortunately we can't exploit those
8604 extra calculations to do the full operation in fewer steps, I don't think.
8605 Although all vector elements of the result but the first are ignored, we
8606 actually calculate the same result in each of the elements. An alternative
8607 such as initially loading a vector with zero to use as each of the second
8608 operands would use up an additional register and take an extra instruction,
8609 for no particular gain. */
8612 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8613 rtx (*reduc) (rtx, rtx, rtx))
8615 enum machine_mode inner = GET_MODE_INNER (mode);
8616 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8619 for (i = parts / 2; i >= 1; i /= 2)
8621 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8622 emit_insn (reduc (dest, tmpsum, tmpsum));
8627 /* If VALS is a vector constant that can be loaded into a register
8628 using VDUP, generate instructions to do so and return an RTX to
8629 assign to the register. Otherwise return NULL_RTX. */
8632 neon_vdup_constant (rtx vals)
8634 enum machine_mode mode = GET_MODE (vals);
8635 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8636 int n_elts = GET_MODE_NUNITS (mode);
8637 bool all_same = true;
8641 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8644 for (i = 0; i < n_elts; ++i)
8646 x = XVECEXP (vals, 0, i);
8647 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8652 /* The elements are not all the same. We could handle repeating
8653 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8654 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8658 /* We can load this constant by using VDUP and a constant in a
8659 single ARM register. This will be cheaper than a vector
8662 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8663 return gen_rtx_VEC_DUPLICATE (mode, x);
8666 /* Generate code to load VALS, which is a PARALLEL containing only
8667 constants (for vec_init) or CONST_VECTOR, efficiently into a
8668 register. Returns an RTX to copy into the register, or NULL_RTX
8669 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8672 neon_make_constant (rtx vals)
8674 enum machine_mode mode = GET_MODE (vals);
8676 rtx const_vec = NULL_RTX;
8677 int n_elts = GET_MODE_NUNITS (mode);
8681 if (GET_CODE (vals) == CONST_VECTOR)
8683 else if (GET_CODE (vals) == PARALLEL)
8685 /* A CONST_VECTOR must contain only CONST_INTs and
8686 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8687 Only store valid constants in a CONST_VECTOR. */
8688 for (i = 0; i < n_elts; ++i)
8690 rtx x = XVECEXP (vals, 0, i);
8691 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8694 if (n_const == n_elts)
8695 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8700 if (const_vec != NULL
8701 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8702 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8704 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8705 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8706 pipeline cycle; creating the constant takes one or two ARM
8709 else if (const_vec != NULL_RTX)
8710 /* Load from constant pool. On Cortex-A8 this takes two cycles
8711 (for either double or quad vectors). We can not take advantage
8712 of single-cycle VLD1 because we need a PC-relative addressing
8716 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8717 We can not construct an initializer. */
8721 /* Initialize vector TARGET to VALS. */
8724 neon_expand_vector_init (rtx target, rtx vals)
8726 enum machine_mode mode = GET_MODE (target);
8727 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8728 int n_elts = GET_MODE_NUNITS (mode);
8729 int n_var = 0, one_var = -1;
8730 bool all_same = true;
8734 for (i = 0; i < n_elts; ++i)
8736 x = XVECEXP (vals, 0, i);
8737 if (!CONSTANT_P (x))
8738 ++n_var, one_var = i;
8740 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8746 rtx constant = neon_make_constant (vals);
8747 if (constant != NULL_RTX)
8749 emit_move_insn (target, constant);
8754 /* Splat a single non-constant element if we can. */
8755 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8757 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8758 emit_insn (gen_rtx_SET (VOIDmode, target,
8759 gen_rtx_VEC_DUPLICATE (mode, x)));
8763 /* One field is non-constant. Load constant then overwrite varying
8764 field. This is more efficient than using the stack. */
8767 rtx copy = copy_rtx (vals);
8768 rtx index = GEN_INT (one_var);
8770 /* Load constant part of vector, substitute neighboring value for
8772 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8773 neon_expand_vector_init (target, copy);
8775 /* Insert variable. */
8776 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8780 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8783 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8786 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8789 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8792 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8795 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8798 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8801 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8804 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8812 /* Construct the vector in memory one field at a time
8813 and load the whole vector. */
8814 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8815 for (i = 0; i < n_elts; i++)
8816 emit_move_insn (adjust_address_nv (mem, inner_mode,
8817 i * GET_MODE_SIZE (inner_mode)),
8818 XVECEXP (vals, 0, i));
8819 emit_move_insn (target, mem);
8822 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8823 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8824 reported source locations are bogus. */
8827 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8832 gcc_assert (GET_CODE (operand) == CONST_INT);
8834 lane = INTVAL (operand);
8836 if (lane < low || lane >= high)
8840 /* Bounds-check lanes. */
8843 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8845 bounds_check (operand, low, high, "lane out of range");
8848 /* Bounds-check constants. */
8851 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8853 bounds_check (operand, low, high, "constant out of range");
8857 neon_element_bits (enum machine_mode mode)
8860 return GET_MODE_BITSIZE (mode);
8862 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8866 /* Predicates for `match_operand' and `match_operator'. */
8868 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8870 cirrus_memory_offset (rtx op)
8872 /* Reject eliminable registers. */
8873 if (! (reload_in_progress || reload_completed)
8874 && ( reg_mentioned_p (frame_pointer_rtx, op)
8875 || reg_mentioned_p (arg_pointer_rtx, op)
8876 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8877 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8878 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8879 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8882 if (GET_CODE (op) == MEM)
8888 /* Match: (mem (reg)). */
8889 if (GET_CODE (ind) == REG)
8895 if (GET_CODE (ind) == PLUS
8896 && GET_CODE (XEXP (ind, 0)) == REG
8897 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8898 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8905 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8906 WB is true if full writeback address modes are allowed and is false
8907 if limited writeback address modes (POST_INC and PRE_DEC) are
8911 arm_coproc_mem_operand (rtx op, bool wb)
8915 /* Reject eliminable registers. */
8916 if (! (reload_in_progress || reload_completed)
8917 && ( reg_mentioned_p (frame_pointer_rtx, op)
8918 || reg_mentioned_p (arg_pointer_rtx, op)
8919 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8920 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8921 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8922 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8925 /* Constants are converted into offsets from labels. */
8926 if (GET_CODE (op) != MEM)
8931 if (reload_completed
8932 && (GET_CODE (ind) == LABEL_REF
8933 || (GET_CODE (ind) == CONST
8934 && GET_CODE (XEXP (ind, 0)) == PLUS
8935 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8936 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8939 /* Match: (mem (reg)). */
8940 if (GET_CODE (ind) == REG)
8941 return arm_address_register_rtx_p (ind, 0);
8943 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8944 acceptable in any case (subject to verification by
8945 arm_address_register_rtx_p). We need WB to be true to accept
8946 PRE_INC and POST_DEC. */
8947 if (GET_CODE (ind) == POST_INC
8948 || GET_CODE (ind) == PRE_DEC
8950 && (GET_CODE (ind) == PRE_INC
8951 || GET_CODE (ind) == POST_DEC)))
8952 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8955 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8956 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8957 && GET_CODE (XEXP (ind, 1)) == PLUS
8958 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8959 ind = XEXP (ind, 1);
8964 if (GET_CODE (ind) == PLUS
8965 && GET_CODE (XEXP (ind, 0)) == REG
8966 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8967 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8968 && INTVAL (XEXP (ind, 1)) > -1024
8969 && INTVAL (XEXP (ind, 1)) < 1024
8970 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8976 /* Return TRUE if OP is a memory operand which we can load or store a vector
8977 to/from. TYPE is one of the following values:
8978 0 - Vector load/stor (vldr)
8979 1 - Core registers (ldm)
8980 2 - Element/structure loads (vld1)
8983 neon_vector_mem_operand (rtx op, int type)
8987 /* Reject eliminable registers. */
8988 if (! (reload_in_progress || reload_completed)
8989 && ( reg_mentioned_p (frame_pointer_rtx, op)
8990 || reg_mentioned_p (arg_pointer_rtx, op)
8991 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8992 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8993 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8994 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8997 /* Constants are converted into offsets from labels. */
8998 if (GET_CODE (op) != MEM)
9003 if (reload_completed
9004 && (GET_CODE (ind) == LABEL_REF
9005 || (GET_CODE (ind) == CONST
9006 && GET_CODE (XEXP (ind, 0)) == PLUS
9007 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9008 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9011 /* Match: (mem (reg)). */
9012 if (GET_CODE (ind) == REG)
9013 return arm_address_register_rtx_p (ind, 0);
9015 /* Allow post-increment with Neon registers. */
9016 if ((type != 1 && GET_CODE (ind) == POST_INC)
9017 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9018 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9020 /* FIXME: vld1 allows register post-modify. */
9026 && GET_CODE (ind) == PLUS
9027 && GET_CODE (XEXP (ind, 0)) == REG
9028 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9029 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9030 && INTVAL (XEXP (ind, 1)) > -1024
9031 && INTVAL (XEXP (ind, 1)) < 1016
9032 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9038 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9041 neon_struct_mem_operand (rtx op)
9045 /* Reject eliminable registers. */
9046 if (! (reload_in_progress || reload_completed)
9047 && ( reg_mentioned_p (frame_pointer_rtx, op)
9048 || reg_mentioned_p (arg_pointer_rtx, op)
9049 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9050 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9051 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9052 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9055 /* Constants are converted into offsets from labels. */
9056 if (GET_CODE (op) != MEM)
9061 if (reload_completed
9062 && (GET_CODE (ind) == LABEL_REF
9063 || (GET_CODE (ind) == CONST
9064 && GET_CODE (XEXP (ind, 0)) == PLUS
9065 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9066 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9069 /* Match: (mem (reg)). */
9070 if (GET_CODE (ind) == REG)
9071 return arm_address_register_rtx_p (ind, 0);
9076 /* Return true if X is a register that will be eliminated later on. */
9078 arm_eliminable_register (rtx x)
9080 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9081 || REGNO (x) == ARG_POINTER_REGNUM
9082 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9083 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9086 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9087 coprocessor registers. Otherwise return NO_REGS. */
9090 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9094 if (!TARGET_NEON_FP16)
9095 return GENERAL_REGS;
9096 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9098 return GENERAL_REGS;
9101 /* The neon move patterns handle all legitimate vector and struct
9105 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9106 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9107 || VALID_NEON_STRUCT_MODE (mode)))
9110 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9113 return GENERAL_REGS;
9116 /* Values which must be returned in the most-significant end of the return
9120 arm_return_in_msb (const_tree valtype)
9122 return (TARGET_AAPCS_BASED
9124 && (AGGREGATE_TYPE_P (valtype)
9125 || TREE_CODE (valtype) == COMPLEX_TYPE));
9128 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9129 Use by the Cirrus Maverick code which has to workaround
9130 a hardware bug triggered by such instructions. */
9132 arm_memory_load_p (rtx insn)
9134 rtx body, lhs, rhs;;
9136 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9139 body = PATTERN (insn);
9141 if (GET_CODE (body) != SET)
9144 lhs = XEXP (body, 0);
9145 rhs = XEXP (body, 1);
9147 lhs = REG_OR_SUBREG_RTX (lhs);
9149 /* If the destination is not a general purpose
9150 register we do not have to worry. */
9151 if (GET_CODE (lhs) != REG
9152 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9155 /* As well as loads from memory we also have to react
9156 to loads of invalid constants which will be turned
9157 into loads from the minipool. */
9158 return (GET_CODE (rhs) == MEM
9159 || GET_CODE (rhs) == SYMBOL_REF
9160 || note_invalid_constants (insn, -1, false));
9163 /* Return TRUE if INSN is a Cirrus instruction. */
9165 arm_cirrus_insn_p (rtx insn)
9167 enum attr_cirrus attr;
9169 /* get_attr cannot accept USE or CLOBBER. */
9171 || GET_CODE (insn) != INSN
9172 || GET_CODE (PATTERN (insn)) == USE
9173 || GET_CODE (PATTERN (insn)) == CLOBBER)
9176 attr = get_attr_cirrus (insn);
9178 return attr != CIRRUS_NOT;
9181 /* Cirrus reorg for invalid instruction combinations. */
9183 cirrus_reorg (rtx first)
9185 enum attr_cirrus attr;
9186 rtx body = PATTERN (first);
9190 /* Any branch must be followed by 2 non Cirrus instructions. */
9191 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9194 t = next_nonnote_insn (first);
9196 if (arm_cirrus_insn_p (t))
9199 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9203 emit_insn_after (gen_nop (), first);
9208 /* (float (blah)) is in parallel with a clobber. */
9209 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9210 body = XVECEXP (body, 0, 0);
9212 if (GET_CODE (body) == SET)
9214 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9216 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9217 be followed by a non Cirrus insn. */
9218 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9220 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9221 emit_insn_after (gen_nop (), first);
9225 else if (arm_memory_load_p (first))
9227 unsigned int arm_regno;
9229 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9230 ldr/cfmv64hr combination where the Rd field is the same
9231 in both instructions must be split with a non Cirrus
9238 /* Get Arm register number for ldr insn. */
9239 if (GET_CODE (lhs) == REG)
9240 arm_regno = REGNO (lhs);
9243 gcc_assert (GET_CODE (rhs) == REG);
9244 arm_regno = REGNO (rhs);
9248 first = next_nonnote_insn (first);
9250 if (! arm_cirrus_insn_p (first))
9253 body = PATTERN (first);
9255 /* (float (blah)) is in parallel with a clobber. */
9256 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9257 body = XVECEXP (body, 0, 0);
9259 if (GET_CODE (body) == FLOAT)
9260 body = XEXP (body, 0);
9262 if (get_attr_cirrus (first) == CIRRUS_MOVE
9263 && GET_CODE (XEXP (body, 1)) == REG
9264 && arm_regno == REGNO (XEXP (body, 1)))
9265 emit_insn_after (gen_nop (), first);
9271 /* get_attr cannot accept USE or CLOBBER. */
9273 || GET_CODE (first) != INSN
9274 || GET_CODE (PATTERN (first)) == USE
9275 || GET_CODE (PATTERN (first)) == CLOBBER)
9278 attr = get_attr_cirrus (first);
9280 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9281 must be followed by a non-coprocessor instruction. */
9282 if (attr == CIRRUS_COMPARE)
9286 t = next_nonnote_insn (first);
9288 if (arm_cirrus_insn_p (t))
9291 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9295 emit_insn_after (gen_nop (), first);
9301 /* Return TRUE if X references a SYMBOL_REF. */
9303 symbol_mentioned_p (rtx x)
9308 if (GET_CODE (x) == SYMBOL_REF)
9311 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9312 are constant offsets, not symbols. */
9313 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9316 fmt = GET_RTX_FORMAT (GET_CODE (x));
9318 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9324 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9325 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9328 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9335 /* Return TRUE if X references a LABEL_REF. */
9337 label_mentioned_p (rtx x)
9342 if (GET_CODE (x) == LABEL_REF)
9345 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9346 instruction, but they are constant offsets, not symbols. */
9347 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9350 fmt = GET_RTX_FORMAT (GET_CODE (x));
9351 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9357 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9358 if (label_mentioned_p (XVECEXP (x, i, j)))
9361 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9369 tls_mentioned_p (rtx x)
9371 switch (GET_CODE (x))
9374 return tls_mentioned_p (XEXP (x, 0));
9377 if (XINT (x, 1) == UNSPEC_TLS)
9385 /* Must not copy any rtx that uses a pc-relative address. */
9388 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9390 if (GET_CODE (*x) == UNSPEC
9391 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9397 arm_cannot_copy_insn_p (rtx insn)
9399 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9405 enum rtx_code code = GET_CODE (x);
9422 /* Return 1 if memory locations are adjacent. */
9424 adjacent_mem_locations (rtx a, rtx b)
9426 /* We don't guarantee to preserve the order of these memory refs. */
9427 if (volatile_refs_p (a) || volatile_refs_p (b))
9430 if ((GET_CODE (XEXP (a, 0)) == REG
9431 || (GET_CODE (XEXP (a, 0)) == PLUS
9432 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9433 && (GET_CODE (XEXP (b, 0)) == REG
9434 || (GET_CODE (XEXP (b, 0)) == PLUS
9435 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9437 HOST_WIDE_INT val0 = 0, val1 = 0;
9441 if (GET_CODE (XEXP (a, 0)) == PLUS)
9443 reg0 = XEXP (XEXP (a, 0), 0);
9444 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9449 if (GET_CODE (XEXP (b, 0)) == PLUS)
9451 reg1 = XEXP (XEXP (b, 0), 0);
9452 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9457 /* Don't accept any offset that will require multiple
9458 instructions to handle, since this would cause the
9459 arith_adjacentmem pattern to output an overlong sequence. */
9460 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9463 /* Don't allow an eliminable register: register elimination can make
9464 the offset too large. */
9465 if (arm_eliminable_register (reg0))
9468 val_diff = val1 - val0;
9472 /* If the target has load delay slots, then there's no benefit
9473 to using an ldm instruction unless the offset is zero and
9474 we are optimizing for size. */
9475 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9476 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9477 && (val_diff == 4 || val_diff == -4));
9480 return ((REGNO (reg0) == REGNO (reg1))
9481 && (val_diff == 4 || val_diff == -4));
9487 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9488 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9489 instruction. ADD_OFFSET is nonzero if the base address register needs
9490 to be modified with an add instruction before we can use it. */
9493 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9494 int nops, HOST_WIDE_INT add_offset)
9496 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9497 if the offset isn't small enough. The reason 2 ldrs are faster
9498 is because these ARMs are able to do more than one cache access
9499 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9500 whilst the ARM8 has a double bandwidth cache. This means that
9501 these cores can do both an instruction fetch and a data fetch in
9502 a single cycle, so the trick of calculating the address into a
9503 scratch register (one of the result regs) and then doing a load
9504 multiple actually becomes slower (and no smaller in code size).
9505 That is the transformation
9507 ldr rd1, [rbase + offset]
9508 ldr rd2, [rbase + offset + 4]
9512 add rd1, rbase, offset
9513 ldmia rd1, {rd1, rd2}
9515 produces worse code -- '3 cycles + any stalls on rd2' instead of
9516 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9517 access per cycle, the first sequence could never complete in less
9518 than 6 cycles, whereas the ldm sequence would only take 5 and
9519 would make better use of sequential accesses if not hitting the
9522 We cheat here and test 'arm_ld_sched' which we currently know to
9523 only be true for the ARM8, ARM9 and StrongARM. If this ever
9524 changes, then the test below needs to be reworked. */
9525 if (nops == 2 && arm_ld_sched && add_offset != 0)
9528 /* XScale has load-store double instructions, but they have stricter
9529 alignment requirements than load-store multiple, so we cannot
9532 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9533 the pipeline until completion.
9541 An ldr instruction takes 1-3 cycles, but does not block the
9550 Best case ldr will always win. However, the more ldr instructions
9551 we issue, the less likely we are to be able to schedule them well.
9552 Using ldr instructions also increases code size.
9554 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9555 for counts of 3 or 4 regs. */
9556 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9561 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9562 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9563 an array ORDER which describes the sequence to use when accessing the
9564 offsets that produces an ascending order. In this sequence, each
9565 offset must be larger by exactly 4 than the previous one. ORDER[0]
9566 must have been filled in with the lowest offset by the caller.
9567 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9568 we use to verify that ORDER produces an ascending order of registers.
9569 Return true if it was possible to construct such an order, false if
9573 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9577 for (i = 1; i < nops; i++)
9581 order[i] = order[i - 1];
9582 for (j = 0; j < nops; j++)
9583 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9585 /* We must find exactly one offset that is higher than the
9586 previous one by 4. */
9587 if (order[i] != order[i - 1])
9591 if (order[i] == order[i - 1])
9593 /* The register numbers must be ascending. */
9594 if (unsorted_regs != NULL
9595 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9601 /* Used to determine in a peephole whether a sequence of load
9602 instructions can be changed into a load-multiple instruction.
9603 NOPS is the number of separate load instructions we are examining. The
9604 first NOPS entries in OPERANDS are the destination registers, the
9605 next NOPS entries are memory operands. If this function is
9606 successful, *BASE is set to the common base register of the memory
9607 accesses; *LOAD_OFFSET is set to the first memory location's offset
9608 from that base register.
9609 REGS is an array filled in with the destination register numbers.
9610 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9611 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9612 the sequence of registers in REGS matches the loads from ascending memory
9613 locations, and the function verifies that the register numbers are
9614 themselves ascending. If CHECK_REGS is false, the register numbers
9615 are stored in the order they are found in the operands. */
9617 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9618 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9620 int unsorted_regs[MAX_LDM_STM_OPS];
9621 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9622 int order[MAX_LDM_STM_OPS];
9623 rtx base_reg_rtx = NULL;
9627 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9628 easily extended if required. */
9629 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9631 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9633 /* Loop over the operands and check that the memory references are
9634 suitable (i.e. immediate offsets from the same base register). At
9635 the same time, extract the target register, and the memory
9637 for (i = 0; i < nops; i++)
9642 /* Convert a subreg of a mem into the mem itself. */
9643 if (GET_CODE (operands[nops + i]) == SUBREG)
9644 operands[nops + i] = alter_subreg (operands + (nops + i));
9646 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9648 /* Don't reorder volatile memory references; it doesn't seem worth
9649 looking for the case where the order is ok anyway. */
9650 if (MEM_VOLATILE_P (operands[nops + i]))
9653 offset = const0_rtx;
9655 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9656 || (GET_CODE (reg) == SUBREG
9657 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9658 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9659 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9661 || (GET_CODE (reg) == SUBREG
9662 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9663 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9668 base_reg = REGNO (reg);
9670 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9673 else if (base_reg != (int) REGNO (reg))
9674 /* Not addressed from the same base register. */
9677 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9678 ? REGNO (operands[i])
9679 : REGNO (SUBREG_REG (operands[i])));
9681 /* If it isn't an integer register, or if it overwrites the
9682 base register but isn't the last insn in the list, then
9683 we can't do this. */
9684 if (unsorted_regs[i] < 0
9685 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9686 || unsorted_regs[i] > 14
9687 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9690 unsorted_offsets[i] = INTVAL (offset);
9691 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9695 /* Not a suitable memory address. */
9699 /* All the useful information has now been extracted from the
9700 operands into unsorted_regs and unsorted_offsets; additionally,
9701 order[0] has been set to the lowest offset in the list. Sort
9702 the offsets into order, verifying that they are adjacent, and
9703 check that the register numbers are ascending. */
9704 if (!compute_offset_order (nops, unsorted_offsets, order,
9705 check_regs ? unsorted_regs : NULL))
9709 memcpy (saved_order, order, sizeof order);
9715 for (i = 0; i < nops; i++)
9716 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9718 *load_offset = unsorted_offsets[order[0]];
9722 && !peep2_reg_dead_p (nops, base_reg_rtx))
9725 if (unsorted_offsets[order[0]] == 0)
9726 ldm_case = 1; /* ldmia */
9727 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9728 ldm_case = 2; /* ldmib */
9729 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9730 ldm_case = 3; /* ldmda */
9731 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9732 ldm_case = 4; /* ldmdb */
9733 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9734 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9739 if (!multiple_operation_profitable_p (false, nops,
9741 ? unsorted_offsets[order[0]] : 0))
9747 /* Used to determine in a peephole whether a sequence of store instructions can
9748 be changed into a store-multiple instruction.
9749 NOPS is the number of separate store instructions we are examining.
9750 NOPS_TOTAL is the total number of instructions recognized by the peephole
9752 The first NOPS entries in OPERANDS are the source registers, the next
9753 NOPS entries are memory operands. If this function is successful, *BASE is
9754 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9755 to the first memory location's offset from that base register. REGS is an
9756 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9757 likewise filled with the corresponding rtx's.
9758 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9759 numbers to an ascending order of stores.
9760 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9761 from ascending memory locations, and the function verifies that the register
9762 numbers are themselves ascending. If CHECK_REGS is false, the register
9763 numbers are stored in the order they are found in the operands. */
9765 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9766 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9767 HOST_WIDE_INT *load_offset, bool check_regs)
9769 int unsorted_regs[MAX_LDM_STM_OPS];
9770 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9771 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9772 int order[MAX_LDM_STM_OPS];
9774 rtx base_reg_rtx = NULL;
9777 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9778 easily extended if required. */
9779 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9781 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9783 /* Loop over the operands and check that the memory references are
9784 suitable (i.e. immediate offsets from the same base register). At
9785 the same time, extract the target register, and the memory
9787 for (i = 0; i < nops; i++)
9792 /* Convert a subreg of a mem into the mem itself. */
9793 if (GET_CODE (operands[nops + i]) == SUBREG)
9794 operands[nops + i] = alter_subreg (operands + (nops + i));
9796 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9798 /* Don't reorder volatile memory references; it doesn't seem worth
9799 looking for the case where the order is ok anyway. */
9800 if (MEM_VOLATILE_P (operands[nops + i]))
9803 offset = const0_rtx;
9805 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9806 || (GET_CODE (reg) == SUBREG
9807 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9808 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9809 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9811 || (GET_CODE (reg) == SUBREG
9812 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9813 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9816 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9817 ? operands[i] : SUBREG_REG (operands[i]));
9818 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9822 base_reg = REGNO (reg);
9824 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9827 else if (base_reg != (int) REGNO (reg))
9828 /* Not addressed from the same base register. */
9831 /* If it isn't an integer register, then we can't do this. */
9832 if (unsorted_regs[i] < 0
9833 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9834 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9835 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9836 || unsorted_regs[i] > 14)
9839 unsorted_offsets[i] = INTVAL (offset);
9840 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9844 /* Not a suitable memory address. */
9848 /* All the useful information has now been extracted from the
9849 operands into unsorted_regs and unsorted_offsets; additionally,
9850 order[0] has been set to the lowest offset in the list. Sort
9851 the offsets into order, verifying that they are adjacent, and
9852 check that the register numbers are ascending. */
9853 if (!compute_offset_order (nops, unsorted_offsets, order,
9854 check_regs ? unsorted_regs : NULL))
9858 memcpy (saved_order, order, sizeof order);
9864 for (i = 0; i < nops; i++)
9866 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9868 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9871 *load_offset = unsorted_offsets[order[0]];
9875 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9878 if (unsorted_offsets[order[0]] == 0)
9879 stm_case = 1; /* stmia */
9880 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9881 stm_case = 2; /* stmib */
9882 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9883 stm_case = 3; /* stmda */
9884 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9885 stm_case = 4; /* stmdb */
9889 if (!multiple_operation_profitable_p (false, nops, 0))
9895 /* Routines for use in generating RTL. */
9897 /* Generate a load-multiple instruction. COUNT is the number of loads in
9898 the instruction; REGS and MEMS are arrays containing the operands.
9899 BASEREG is the base register to be used in addressing the memory operands.
9900 WBACK_OFFSET is nonzero if the instruction should update the base
9904 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9905 HOST_WIDE_INT wback_offset)
9910 if (!multiple_operation_profitable_p (false, count, 0))
9916 for (i = 0; i < count; i++)
9917 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9919 if (wback_offset != 0)
9920 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9928 result = gen_rtx_PARALLEL (VOIDmode,
9929 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9930 if (wback_offset != 0)
9932 XVECEXP (result, 0, 0)
9933 = gen_rtx_SET (VOIDmode, basereg,
9934 plus_constant (basereg, wback_offset));
9939 for (j = 0; i < count; i++, j++)
9940 XVECEXP (result, 0, i)
9941 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9946 /* Generate a store-multiple instruction. COUNT is the number of stores in
9947 the instruction; REGS and MEMS are arrays containing the operands.
9948 BASEREG is the base register to be used in addressing the memory operands.
9949 WBACK_OFFSET is nonzero if the instruction should update the base
9953 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9954 HOST_WIDE_INT wback_offset)
9959 if (GET_CODE (basereg) == PLUS)
9960 basereg = XEXP (basereg, 0);
9962 if (!multiple_operation_profitable_p (false, count, 0))
9968 for (i = 0; i < count; i++)
9969 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9971 if (wback_offset != 0)
9972 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9980 result = gen_rtx_PARALLEL (VOIDmode,
9981 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9982 if (wback_offset != 0)
9984 XVECEXP (result, 0, 0)
9985 = gen_rtx_SET (VOIDmode, basereg,
9986 plus_constant (basereg, wback_offset));
9991 for (j = 0; i < count; i++, j++)
9992 XVECEXP (result, 0, i)
9993 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9998 /* Generate either a load-multiple or a store-multiple instruction. This
9999 function can be used in situations where we can start with a single MEM
10000 rtx and adjust its address upwards.
10001 COUNT is the number of operations in the instruction, not counting a
10002 possible update of the base register. REGS is an array containing the
10004 BASEREG is the base register to be used in addressing the memory operands,
10005 which are constructed from BASEMEM.
10006 WRITE_BACK specifies whether the generated instruction should include an
10007 update of the base register.
10008 OFFSETP is used to pass an offset to and from this function; this offset
10009 is not used when constructing the address (instead BASEMEM should have an
10010 appropriate offset in its address), it is used only for setting
10011 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10014 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10015 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10017 rtx mems[MAX_LDM_STM_OPS];
10018 HOST_WIDE_INT offset = *offsetp;
10021 gcc_assert (count <= MAX_LDM_STM_OPS);
10023 if (GET_CODE (basereg) == PLUS)
10024 basereg = XEXP (basereg, 0);
10026 for (i = 0; i < count; i++)
10028 rtx addr = plus_constant (basereg, i * 4);
10029 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10037 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10038 write_back ? 4 * count : 0);
10040 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10041 write_back ? 4 * count : 0);
10045 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10046 rtx basemem, HOST_WIDE_INT *offsetp)
10048 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10053 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10054 rtx basemem, HOST_WIDE_INT *offsetp)
10056 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10060 /* Called from a peephole2 expander to turn a sequence of loads into an
10061 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10062 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10063 is true if we can reorder the registers because they are used commutatively
10065 Returns true iff we could generate a new instruction. */
10068 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10070 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10071 rtx mems[MAX_LDM_STM_OPS];
10072 int i, j, base_reg;
10074 HOST_WIDE_INT offset;
10075 int write_back = FALSE;
10079 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10080 &base_reg, &offset, !sort_regs);
10086 for (i = 0; i < nops - 1; i++)
10087 for (j = i + 1; j < nops; j++)
10088 if (regs[i] > regs[j])
10094 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10098 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10099 gcc_assert (ldm_case == 1 || ldm_case == 5);
10105 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10106 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10108 if (!TARGET_THUMB1)
10110 base_reg = regs[0];
10111 base_reg_rtx = newbase;
10115 for (i = 0; i < nops; i++)
10117 addr = plus_constant (base_reg_rtx, offset + i * 4);
10118 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10121 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10122 write_back ? offset + i * 4 : 0));
10126 /* Called from a peephole2 expander to turn a sequence of stores into an
10127 STM instruction. OPERANDS are the operands found by the peephole matcher;
10128 NOPS indicates how many separate stores we are trying to combine.
10129 Returns true iff we could generate a new instruction. */
10132 gen_stm_seq (rtx *operands, int nops)
10135 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10136 rtx mems[MAX_LDM_STM_OPS];
10139 HOST_WIDE_INT offset;
10140 int write_back = FALSE;
10143 bool base_reg_dies;
10145 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10146 mem_order, &base_reg, &offset, true);
10151 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10153 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10156 gcc_assert (base_reg_dies);
10162 gcc_assert (base_reg_dies);
10163 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10167 addr = plus_constant (base_reg_rtx, offset);
10169 for (i = 0; i < nops; i++)
10171 addr = plus_constant (base_reg_rtx, offset + i * 4);
10172 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10175 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10176 write_back ? offset + i * 4 : 0));
10180 /* Called from a peephole2 expander to turn a sequence of stores that are
10181 preceded by constant loads into an STM instruction. OPERANDS are the
10182 operands found by the peephole matcher; NOPS indicates how many
10183 separate stores we are trying to combine; there are 2 * NOPS
10184 instructions in the peephole.
10185 Returns true iff we could generate a new instruction. */
10188 gen_const_stm_seq (rtx *operands, int nops)
10190 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10191 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10192 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10193 rtx mems[MAX_LDM_STM_OPS];
10196 HOST_WIDE_INT offset;
10197 int write_back = FALSE;
10200 bool base_reg_dies;
10202 HARD_REG_SET allocated;
10204 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10205 mem_order, &base_reg, &offset, false);
10210 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10212 /* If the same register is used more than once, try to find a free
10214 CLEAR_HARD_REG_SET (allocated);
10215 for (i = 0; i < nops; i++)
10217 for (j = i + 1; j < nops; j++)
10218 if (regs[i] == regs[j])
10220 rtx t = peep2_find_free_register (0, nops * 2,
10221 TARGET_THUMB1 ? "l" : "r",
10222 SImode, &allocated);
10226 regs[i] = REGNO (t);
10230 /* Compute an ordering that maps the register numbers to an ascending
10233 for (i = 0; i < nops; i++)
10234 if (regs[i] < regs[reg_order[0]])
10237 for (i = 1; i < nops; i++)
10239 int this_order = reg_order[i - 1];
10240 for (j = 0; j < nops; j++)
10241 if (regs[j] > regs[reg_order[i - 1]]
10242 && (this_order == reg_order[i - 1]
10243 || regs[j] < regs[this_order]))
10245 reg_order[i] = this_order;
10248 /* Ensure that registers that must be live after the instruction end
10249 up with the correct value. */
10250 for (i = 0; i < nops; i++)
10252 int this_order = reg_order[i];
10253 if ((this_order != mem_order[i]
10254 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10255 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10259 /* Load the constants. */
10260 for (i = 0; i < nops; i++)
10262 rtx op = operands[2 * nops + mem_order[i]];
10263 sorted_regs[i] = regs[reg_order[i]];
10264 emit_move_insn (reg_rtxs[reg_order[i]], op);
10267 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10269 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10272 gcc_assert (base_reg_dies);
10278 gcc_assert (base_reg_dies);
10279 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10283 addr = plus_constant (base_reg_rtx, offset);
10285 for (i = 0; i < nops; i++)
10287 addr = plus_constant (base_reg_rtx, offset + i * 4);
10288 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10291 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10292 write_back ? offset + i * 4 : 0));
10297 arm_gen_movmemqi (rtx *operands)
10299 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10300 HOST_WIDE_INT srcoffset, dstoffset;
10302 rtx src, dst, srcbase, dstbase;
10303 rtx part_bytes_reg = NULL;
10306 if (GET_CODE (operands[2]) != CONST_INT
10307 || GET_CODE (operands[3]) != CONST_INT
10308 || INTVAL (operands[2]) > 64
10309 || INTVAL (operands[3]) & 3)
10312 dstbase = operands[0];
10313 srcbase = operands[1];
10315 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10316 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10318 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10319 out_words_to_go = INTVAL (operands[2]) / 4;
10320 last_bytes = INTVAL (operands[2]) & 3;
10321 dstoffset = srcoffset = 0;
10323 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10324 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10326 for (i = 0; in_words_to_go >= 2; i+=4)
10328 if (in_words_to_go > 4)
10329 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10330 TRUE, srcbase, &srcoffset));
10332 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10333 src, FALSE, srcbase,
10336 if (out_words_to_go)
10338 if (out_words_to_go > 4)
10339 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10340 TRUE, dstbase, &dstoffset));
10341 else if (out_words_to_go != 1)
10342 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10343 out_words_to_go, dst,
10346 dstbase, &dstoffset));
10349 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10350 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10351 if (last_bytes != 0)
10353 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10359 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10360 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10363 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10364 if (out_words_to_go)
10368 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10369 sreg = copy_to_reg (mem);
10371 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10372 emit_move_insn (mem, sreg);
10375 gcc_assert (!in_words_to_go); /* Sanity check */
10378 if (in_words_to_go)
10380 gcc_assert (in_words_to_go > 0);
10382 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10383 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10386 gcc_assert (!last_bytes || part_bytes_reg);
10388 if (BYTES_BIG_ENDIAN && last_bytes)
10390 rtx tmp = gen_reg_rtx (SImode);
10392 /* The bytes we want are in the top end of the word. */
10393 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10394 GEN_INT (8 * (4 - last_bytes))));
10395 part_bytes_reg = tmp;
10399 mem = adjust_automodify_address (dstbase, QImode,
10400 plus_constant (dst, last_bytes - 1),
10401 dstoffset + last_bytes - 1);
10402 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10406 tmp = gen_reg_rtx (SImode);
10407 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10408 part_bytes_reg = tmp;
10415 if (last_bytes > 1)
10417 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10418 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10422 rtx tmp = gen_reg_rtx (SImode);
10423 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10424 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10425 part_bytes_reg = tmp;
10432 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10433 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10440 /* Select a dominance comparison mode if possible for a test of the general
10441 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10442 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10443 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10444 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10445 In all cases OP will be either EQ or NE, but we don't need to know which
10446 here. If we are unable to support a dominance comparison we return
10447 CC mode. This will then fail to match for the RTL expressions that
10448 generate this call. */
10450 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10452 enum rtx_code cond1, cond2;
10455 /* Currently we will probably get the wrong result if the individual
10456 comparisons are not simple. This also ensures that it is safe to
10457 reverse a comparison if necessary. */
10458 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10460 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10464 /* The if_then_else variant of this tests the second condition if the
10465 first passes, but is true if the first fails. Reverse the first
10466 condition to get a true "inclusive-or" expression. */
10467 if (cond_or == DOM_CC_NX_OR_Y)
10468 cond1 = reverse_condition (cond1);
10470 /* If the comparisons are not equal, and one doesn't dominate the other,
10471 then we can't do this. */
10473 && !comparison_dominates_p (cond1, cond2)
10474 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10479 enum rtx_code temp = cond1;
10487 if (cond_or == DOM_CC_X_AND_Y)
10492 case EQ: return CC_DEQmode;
10493 case LE: return CC_DLEmode;
10494 case LEU: return CC_DLEUmode;
10495 case GE: return CC_DGEmode;
10496 case GEU: return CC_DGEUmode;
10497 default: gcc_unreachable ();
10501 if (cond_or == DOM_CC_X_AND_Y)
10513 gcc_unreachable ();
10517 if (cond_or == DOM_CC_X_AND_Y)
10529 gcc_unreachable ();
10533 if (cond_or == DOM_CC_X_AND_Y)
10534 return CC_DLTUmode;
10539 return CC_DLTUmode;
10541 return CC_DLEUmode;
10545 gcc_unreachable ();
10549 if (cond_or == DOM_CC_X_AND_Y)
10550 return CC_DGTUmode;
10555 return CC_DGTUmode;
10557 return CC_DGEUmode;
10561 gcc_unreachable ();
10564 /* The remaining cases only occur when both comparisons are the
10567 gcc_assert (cond1 == cond2);
10571 gcc_assert (cond1 == cond2);
10575 gcc_assert (cond1 == cond2);
10579 gcc_assert (cond1 == cond2);
10580 return CC_DLEUmode;
10583 gcc_assert (cond1 == cond2);
10584 return CC_DGEUmode;
10587 gcc_unreachable ();
10592 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10594 /* All floating point compares return CCFP if it is an equality
10595 comparison, and CCFPE otherwise. */
10596 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10616 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10621 gcc_unreachable ();
10625 /* A compare with a shifted operand. Because of canonicalization, the
10626 comparison will have to be swapped when we emit the assembler. */
10627 if (GET_MODE (y) == SImode
10628 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10629 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10630 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10631 || GET_CODE (x) == ROTATERT))
10634 /* This operation is performed swapped, but since we only rely on the Z
10635 flag we don't need an additional mode. */
10636 if (GET_MODE (y) == SImode
10637 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10638 && GET_CODE (x) == NEG
10639 && (op == EQ || op == NE))
10642 /* This is a special case that is used by combine to allow a
10643 comparison of a shifted byte load to be split into a zero-extend
10644 followed by a comparison of the shifted integer (only valid for
10645 equalities and unsigned inequalities). */
10646 if (GET_MODE (x) == SImode
10647 && GET_CODE (x) == ASHIFT
10648 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10649 && GET_CODE (XEXP (x, 0)) == SUBREG
10650 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10651 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10652 && (op == EQ || op == NE
10653 || op == GEU || op == GTU || op == LTU || op == LEU)
10654 && GET_CODE (y) == CONST_INT)
10657 /* A construct for a conditional compare, if the false arm contains
10658 0, then both conditions must be true, otherwise either condition
10659 must be true. Not all conditions are possible, so CCmode is
10660 returned if it can't be done. */
10661 if (GET_CODE (x) == IF_THEN_ELSE
10662 && (XEXP (x, 2) == const0_rtx
10663 || XEXP (x, 2) == const1_rtx)
10664 && COMPARISON_P (XEXP (x, 0))
10665 && COMPARISON_P (XEXP (x, 1)))
10666 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10667 INTVAL (XEXP (x, 2)));
10669 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10670 if (GET_CODE (x) == AND
10671 && (op == EQ || op == NE)
10672 && COMPARISON_P (XEXP (x, 0))
10673 && COMPARISON_P (XEXP (x, 1)))
10674 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10677 if (GET_CODE (x) == IOR
10678 && (op == EQ || op == NE)
10679 && COMPARISON_P (XEXP (x, 0))
10680 && COMPARISON_P (XEXP (x, 1)))
10681 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10684 /* An operation (on Thumb) where we want to test for a single bit.
10685 This is done by shifting that bit up into the top bit of a
10686 scratch register; we can then branch on the sign bit. */
10688 && GET_MODE (x) == SImode
10689 && (op == EQ || op == NE)
10690 && GET_CODE (x) == ZERO_EXTRACT
10691 && XEXP (x, 1) == const1_rtx)
10694 /* An operation that sets the condition codes as a side-effect, the
10695 V flag is not set correctly, so we can only use comparisons where
10696 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10698 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10699 if (GET_MODE (x) == SImode
10701 && (op == EQ || op == NE || op == LT || op == GE)
10702 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10703 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10704 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10705 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10706 || GET_CODE (x) == LSHIFTRT
10707 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10708 || GET_CODE (x) == ROTATERT
10709 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10710 return CC_NOOVmode;
10712 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10715 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10716 && GET_CODE (x) == PLUS
10717 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10720 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10722 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10724 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10731 /* A DImode comparison against zero can be implemented by
10732 or'ing the two halves together. */
10733 if (y == const0_rtx)
10736 /* We can do an equality test in three Thumb instructions. */
10746 /* DImode unsigned comparisons can be implemented by cmp +
10747 cmpeq without a scratch register. Not worth doing in
10758 /* DImode signed and unsigned comparisons can be implemented
10759 by cmp + sbcs with a scratch register, but that does not
10760 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10761 gcc_assert (op != EQ && op != NE);
10765 gcc_unreachable ();
10772 /* X and Y are two things to compare using CODE. Emit the compare insn and
10773 return the rtx for register 0 in the proper mode. FP means this is a
10774 floating point compare: I don't think that it is needed on the arm. */
10776 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10778 enum machine_mode mode;
10780 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10782 /* We might have X as a constant, Y as a register because of the predicates
10783 used for cmpdi. If so, force X to a register here. */
10784 if (dimode_comparison && !REG_P (x))
10785 x = force_reg (DImode, x);
10787 mode = SELECT_CC_MODE (code, x, y);
10788 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10790 if (dimode_comparison
10791 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10792 && mode != CC_CZmode)
10796 /* To compare two non-zero values for equality, XOR them and
10797 then compare against zero. Not used for ARM mode; there
10798 CC_CZmode is cheaper. */
10799 if (mode == CC_Zmode && y != const0_rtx)
10801 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10804 /* A scratch register is required. */
10805 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10806 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10810 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10815 /* Generate a sequence of insns that will generate the correct return
10816 address mask depending on the physical architecture that the program
10819 arm_gen_return_addr_mask (void)
10821 rtx reg = gen_reg_rtx (Pmode);
10823 emit_insn (gen_return_addr_mask (reg));
10828 arm_reload_in_hi (rtx *operands)
10830 rtx ref = operands[1];
10832 HOST_WIDE_INT offset = 0;
10834 if (GET_CODE (ref) == SUBREG)
10836 offset = SUBREG_BYTE (ref);
10837 ref = SUBREG_REG (ref);
10840 if (GET_CODE (ref) == REG)
10842 /* We have a pseudo which has been spilt onto the stack; there
10843 are two cases here: the first where there is a simple
10844 stack-slot replacement and a second where the stack-slot is
10845 out of range, or is used as a subreg. */
10846 if (reg_equiv_mem (REGNO (ref)))
10848 ref = reg_equiv_mem (REGNO (ref));
10849 base = find_replacement (&XEXP (ref, 0));
10852 /* The slot is out of range, or was dressed up in a SUBREG. */
10853 base = reg_equiv_address (REGNO (ref));
10856 base = find_replacement (&XEXP (ref, 0));
10858 /* Handle the case where the address is too complex to be offset by 1. */
10859 if (GET_CODE (base) == MINUS
10860 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10862 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10864 emit_set_insn (base_plus, base);
10867 else if (GET_CODE (base) == PLUS)
10869 /* The addend must be CONST_INT, or we would have dealt with it above. */
10870 HOST_WIDE_INT hi, lo;
10872 offset += INTVAL (XEXP (base, 1));
10873 base = XEXP (base, 0);
10875 /* Rework the address into a legal sequence of insns. */
10876 /* Valid range for lo is -4095 -> 4095 */
10879 : -((-offset) & 0xfff));
10881 /* Corner case, if lo is the max offset then we would be out of range
10882 once we have added the additional 1 below, so bump the msb into the
10883 pre-loading insn(s). */
10887 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10888 ^ (HOST_WIDE_INT) 0x80000000)
10889 - (HOST_WIDE_INT) 0x80000000);
10891 gcc_assert (hi + lo == offset);
10895 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10897 /* Get the base address; addsi3 knows how to handle constants
10898 that require more than one insn. */
10899 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10905 /* Operands[2] may overlap operands[0] (though it won't overlap
10906 operands[1]), that's why we asked for a DImode reg -- so we can
10907 use the bit that does not overlap. */
10908 if (REGNO (operands[2]) == REGNO (operands[0]))
10909 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10911 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10913 emit_insn (gen_zero_extendqisi2 (scratch,
10914 gen_rtx_MEM (QImode,
10915 plus_constant (base,
10917 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10918 gen_rtx_MEM (QImode,
10919 plus_constant (base,
10921 if (!BYTES_BIG_ENDIAN)
10922 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10923 gen_rtx_IOR (SImode,
10926 gen_rtx_SUBREG (SImode, operands[0], 0),
10930 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10931 gen_rtx_IOR (SImode,
10932 gen_rtx_ASHIFT (SImode, scratch,
10934 gen_rtx_SUBREG (SImode, operands[0], 0)));
10937 /* Handle storing a half-word to memory during reload by synthesizing as two
10938 byte stores. Take care not to clobber the input values until after we
10939 have moved them somewhere safe. This code assumes that if the DImode
10940 scratch in operands[2] overlaps either the input value or output address
10941 in some way, then that value must die in this insn (we absolutely need
10942 two scratch registers for some corner cases). */
10944 arm_reload_out_hi (rtx *operands)
10946 rtx ref = operands[0];
10947 rtx outval = operands[1];
10949 HOST_WIDE_INT offset = 0;
10951 if (GET_CODE (ref) == SUBREG)
10953 offset = SUBREG_BYTE (ref);
10954 ref = SUBREG_REG (ref);
10957 if (GET_CODE (ref) == REG)
10959 /* We have a pseudo which has been spilt onto the stack; there
10960 are two cases here: the first where there is a simple
10961 stack-slot replacement and a second where the stack-slot is
10962 out of range, or is used as a subreg. */
10963 if (reg_equiv_mem (REGNO (ref)))
10965 ref = reg_equiv_mem (REGNO (ref));
10966 base = find_replacement (&XEXP (ref, 0));
10969 /* The slot is out of range, or was dressed up in a SUBREG. */
10970 base = reg_equiv_address (REGNO (ref));
10973 base = find_replacement (&XEXP (ref, 0));
10975 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10977 /* Handle the case where the address is too complex to be offset by 1. */
10978 if (GET_CODE (base) == MINUS
10979 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10981 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10983 /* Be careful not to destroy OUTVAL. */
10984 if (reg_overlap_mentioned_p (base_plus, outval))
10986 /* Updating base_plus might destroy outval, see if we can
10987 swap the scratch and base_plus. */
10988 if (!reg_overlap_mentioned_p (scratch, outval))
10991 scratch = base_plus;
10996 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10998 /* Be conservative and copy OUTVAL into the scratch now,
10999 this should only be necessary if outval is a subreg
11000 of something larger than a word. */
11001 /* XXX Might this clobber base? I can't see how it can,
11002 since scratch is known to overlap with OUTVAL, and
11003 must be wider than a word. */
11004 emit_insn (gen_movhi (scratch_hi, outval));
11005 outval = scratch_hi;
11009 emit_set_insn (base_plus, base);
11012 else if (GET_CODE (base) == PLUS)
11014 /* The addend must be CONST_INT, or we would have dealt with it above. */
11015 HOST_WIDE_INT hi, lo;
11017 offset += INTVAL (XEXP (base, 1));
11018 base = XEXP (base, 0);
11020 /* Rework the address into a legal sequence of insns. */
11021 /* Valid range for lo is -4095 -> 4095 */
11024 : -((-offset) & 0xfff));
11026 /* Corner case, if lo is the max offset then we would be out of range
11027 once we have added the additional 1 below, so bump the msb into the
11028 pre-loading insn(s). */
11032 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11033 ^ (HOST_WIDE_INT) 0x80000000)
11034 - (HOST_WIDE_INT) 0x80000000);
11036 gcc_assert (hi + lo == offset);
11040 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11042 /* Be careful not to destroy OUTVAL. */
11043 if (reg_overlap_mentioned_p (base_plus, outval))
11045 /* Updating base_plus might destroy outval, see if we
11046 can swap the scratch and base_plus. */
11047 if (!reg_overlap_mentioned_p (scratch, outval))
11050 scratch = base_plus;
11055 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11057 /* Be conservative and copy outval into scratch now,
11058 this should only be necessary if outval is a
11059 subreg of something larger than a word. */
11060 /* XXX Might this clobber base? I can't see how it
11061 can, since scratch is known to overlap with
11063 emit_insn (gen_movhi (scratch_hi, outval));
11064 outval = scratch_hi;
11068 /* Get the base address; addsi3 knows how to handle constants
11069 that require more than one insn. */
11070 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11076 if (BYTES_BIG_ENDIAN)
11078 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11079 plus_constant (base, offset + 1)),
11080 gen_lowpart (QImode, outval)));
11081 emit_insn (gen_lshrsi3 (scratch,
11082 gen_rtx_SUBREG (SImode, outval, 0),
11084 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11085 gen_lowpart (QImode, scratch)));
11089 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11090 gen_lowpart (QImode, outval)));
11091 emit_insn (gen_lshrsi3 (scratch,
11092 gen_rtx_SUBREG (SImode, outval, 0),
11094 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11095 plus_constant (base, offset + 1)),
11096 gen_lowpart (QImode, scratch)));
11100 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11101 (padded to the size of a word) should be passed in a register. */
11104 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11106 if (TARGET_AAPCS_BASED)
11107 return must_pass_in_stack_var_size (mode, type);
11109 return must_pass_in_stack_var_size_or_pad (mode, type);
11113 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11114 Return true if an argument passed on the stack should be padded upwards,
11115 i.e. if the least-significant byte has useful data.
11116 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11117 aggregate types are placed in the lowest memory address. */
11120 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11122 if (!TARGET_AAPCS_BASED)
11123 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11125 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11132 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11133 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11134 byte of the register has useful data, and return the opposite if the
11135 most significant byte does.
11136 For AAPCS, small aggregates and small complex types are always padded
11140 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11141 tree type, int first ATTRIBUTE_UNUSED)
11143 if (TARGET_AAPCS_BASED
11144 && BYTES_BIG_ENDIAN
11145 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11146 && int_size_in_bytes (type) <= 4)
11149 /* Otherwise, use default padding. */
11150 return !BYTES_BIG_ENDIAN;
11154 /* Print a symbolic form of X to the debug file, F. */
11156 arm_print_value (FILE *f, rtx x)
11158 switch (GET_CODE (x))
11161 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11165 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11173 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11175 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11176 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11184 fprintf (f, "\"%s\"", XSTR (x, 0));
11188 fprintf (f, "`%s'", XSTR (x, 0));
11192 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11196 arm_print_value (f, XEXP (x, 0));
11200 arm_print_value (f, XEXP (x, 0));
11202 arm_print_value (f, XEXP (x, 1));
11210 fprintf (f, "????");
11215 /* Routines for manipulation of the constant pool. */
11217 /* Arm instructions cannot load a large constant directly into a
11218 register; they have to come from a pc relative load. The constant
11219 must therefore be placed in the addressable range of the pc
11220 relative load. Depending on the precise pc relative load
11221 instruction the range is somewhere between 256 bytes and 4k. This
11222 means that we often have to dump a constant inside a function, and
11223 generate code to branch around it.
11225 It is important to minimize this, since the branches will slow
11226 things down and make the code larger.
11228 Normally we can hide the table after an existing unconditional
11229 branch so that there is no interruption of the flow, but in the
11230 worst case the code looks like this:
11248 We fix this by performing a scan after scheduling, which notices
11249 which instructions need to have their operands fetched from the
11250 constant table and builds the table.
11252 The algorithm starts by building a table of all the constants that
11253 need fixing up and all the natural barriers in the function (places
11254 where a constant table can be dropped without breaking the flow).
11255 For each fixup we note how far the pc-relative replacement will be
11256 able to reach and the offset of the instruction into the function.
11258 Having built the table we then group the fixes together to form
11259 tables that are as large as possible (subject to addressing
11260 constraints) and emit each table of constants after the last
11261 barrier that is within range of all the instructions in the group.
11262 If a group does not contain a barrier, then we forcibly create one
11263 by inserting a jump instruction into the flow. Once the table has
11264 been inserted, the insns are then modified to reference the
11265 relevant entry in the pool.
11267 Possible enhancements to the algorithm (not implemented) are:
11269 1) For some processors and object formats, there may be benefit in
11270 aligning the pools to the start of cache lines; this alignment
11271 would need to be taken into account when calculating addressability
11274 /* These typedefs are located at the start of this file, so that
11275 they can be used in the prototypes there. This comment is to
11276 remind readers of that fact so that the following structures
11277 can be understood more easily.
11279 typedef struct minipool_node Mnode;
11280 typedef struct minipool_fixup Mfix; */
11282 struct minipool_node
11284 /* Doubly linked chain of entries. */
11287 /* The maximum offset into the code that this entry can be placed. While
11288 pushing fixes for forward references, all entries are sorted in order
11289 of increasing max_address. */
11290 HOST_WIDE_INT max_address;
11291 /* Similarly for an entry inserted for a backwards ref. */
11292 HOST_WIDE_INT min_address;
11293 /* The number of fixes referencing this entry. This can become zero
11294 if we "unpush" an entry. In this case we ignore the entry when we
11295 come to emit the code. */
11297 /* The offset from the start of the minipool. */
11298 HOST_WIDE_INT offset;
11299 /* The value in table. */
11301 /* The mode of value. */
11302 enum machine_mode mode;
11303 /* The size of the value. With iWMMXt enabled
11304 sizes > 4 also imply an alignment of 8-bytes. */
11308 struct minipool_fixup
11312 HOST_WIDE_INT address;
11314 enum machine_mode mode;
11318 HOST_WIDE_INT forwards;
11319 HOST_WIDE_INT backwards;
11322 /* Fixes less than a word need padding out to a word boundary. */
11323 #define MINIPOOL_FIX_SIZE(mode) \
11324 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11326 static Mnode * minipool_vector_head;
11327 static Mnode * minipool_vector_tail;
11328 static rtx minipool_vector_label;
11329 static int minipool_pad;
11331 /* The linked list of all minipool fixes required for this function. */
11332 Mfix * minipool_fix_head;
11333 Mfix * minipool_fix_tail;
11334 /* The fix entry for the current minipool, once it has been placed. */
11335 Mfix * minipool_barrier;
11337 /* Determines if INSN is the start of a jump table. Returns the end
11338 of the TABLE or NULL_RTX. */
11340 is_jump_table (rtx insn)
11344 if (GET_CODE (insn) == JUMP_INSN
11345 && JUMP_LABEL (insn) != NULL
11346 && ((table = next_real_insn (JUMP_LABEL (insn)))
11347 == next_real_insn (insn))
11349 && GET_CODE (table) == JUMP_INSN
11350 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11351 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11357 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11358 #define JUMP_TABLES_IN_TEXT_SECTION 0
11361 static HOST_WIDE_INT
11362 get_jump_table_size (rtx insn)
11364 /* ADDR_VECs only take room if read-only data does into the text
11366 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11368 rtx body = PATTERN (insn);
11369 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11370 HOST_WIDE_INT size;
11371 HOST_WIDE_INT modesize;
11373 modesize = GET_MODE_SIZE (GET_MODE (body));
11374 size = modesize * XVECLEN (body, elt);
11378 /* Round up size of TBB table to a halfword boundary. */
11379 size = (size + 1) & ~(HOST_WIDE_INT)1;
11382 /* No padding necessary for TBH. */
11385 /* Add two bytes for alignment on Thumb. */
11390 gcc_unreachable ();
11398 /* Move a minipool fix MP from its current location to before MAX_MP.
11399 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11400 constraints may need updating. */
11402 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11403 HOST_WIDE_INT max_address)
11405 /* The code below assumes these are different. */
11406 gcc_assert (mp != max_mp);
11408 if (max_mp == NULL)
11410 if (max_address < mp->max_address)
11411 mp->max_address = max_address;
11415 if (max_address > max_mp->max_address - mp->fix_size)
11416 mp->max_address = max_mp->max_address - mp->fix_size;
11418 mp->max_address = max_address;
11420 /* Unlink MP from its current position. Since max_mp is non-null,
11421 mp->prev must be non-null. */
11422 mp->prev->next = mp->next;
11423 if (mp->next != NULL)
11424 mp->next->prev = mp->prev;
11426 minipool_vector_tail = mp->prev;
11428 /* Re-insert it before MAX_MP. */
11430 mp->prev = max_mp->prev;
11433 if (mp->prev != NULL)
11434 mp->prev->next = mp;
11436 minipool_vector_head = mp;
11439 /* Save the new entry. */
11442 /* Scan over the preceding entries and adjust their addresses as
11444 while (mp->prev != NULL
11445 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11447 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11454 /* Add a constant to the minipool for a forward reference. Returns the
11455 node added or NULL if the constant will not fit in this pool. */
11457 add_minipool_forward_ref (Mfix *fix)
11459 /* If set, max_mp is the first pool_entry that has a lower
11460 constraint than the one we are trying to add. */
11461 Mnode * max_mp = NULL;
11462 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11465 /* If the minipool starts before the end of FIX->INSN then this FIX
11466 can not be placed into the current pool. Furthermore, adding the
11467 new constant pool entry may cause the pool to start FIX_SIZE bytes
11469 if (minipool_vector_head &&
11470 (fix->address + get_attr_length (fix->insn)
11471 >= minipool_vector_head->max_address - fix->fix_size))
11474 /* Scan the pool to see if a constant with the same value has
11475 already been added. While we are doing this, also note the
11476 location where we must insert the constant if it doesn't already
11478 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11480 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11481 && fix->mode == mp->mode
11482 && (GET_CODE (fix->value) != CODE_LABEL
11483 || (CODE_LABEL_NUMBER (fix->value)
11484 == CODE_LABEL_NUMBER (mp->value)))
11485 && rtx_equal_p (fix->value, mp->value))
11487 /* More than one fix references this entry. */
11489 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11492 /* Note the insertion point if necessary. */
11494 && mp->max_address > max_address)
11497 /* If we are inserting an 8-bytes aligned quantity and
11498 we have not already found an insertion point, then
11499 make sure that all such 8-byte aligned quantities are
11500 placed at the start of the pool. */
11501 if (ARM_DOUBLEWORD_ALIGN
11503 && fix->fix_size >= 8
11504 && mp->fix_size < 8)
11507 max_address = mp->max_address;
11511 /* The value is not currently in the minipool, so we need to create
11512 a new entry for it. If MAX_MP is NULL, the entry will be put on
11513 the end of the list since the placement is less constrained than
11514 any existing entry. Otherwise, we insert the new fix before
11515 MAX_MP and, if necessary, adjust the constraints on the other
11518 mp->fix_size = fix->fix_size;
11519 mp->mode = fix->mode;
11520 mp->value = fix->value;
11522 /* Not yet required for a backwards ref. */
11523 mp->min_address = -65536;
11525 if (max_mp == NULL)
11527 mp->max_address = max_address;
11529 mp->prev = minipool_vector_tail;
11531 if (mp->prev == NULL)
11533 minipool_vector_head = mp;
11534 minipool_vector_label = gen_label_rtx ();
11537 mp->prev->next = mp;
11539 minipool_vector_tail = mp;
11543 if (max_address > max_mp->max_address - mp->fix_size)
11544 mp->max_address = max_mp->max_address - mp->fix_size;
11546 mp->max_address = max_address;
11549 mp->prev = max_mp->prev;
11551 if (mp->prev != NULL)
11552 mp->prev->next = mp;
11554 minipool_vector_head = mp;
11557 /* Save the new entry. */
11560 /* Scan over the preceding entries and adjust their addresses as
11562 while (mp->prev != NULL
11563 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11565 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11573 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11574 HOST_WIDE_INT min_address)
11576 HOST_WIDE_INT offset;
11578 /* The code below assumes these are different. */
11579 gcc_assert (mp != min_mp);
11581 if (min_mp == NULL)
11583 if (min_address > mp->min_address)
11584 mp->min_address = min_address;
11588 /* We will adjust this below if it is too loose. */
11589 mp->min_address = min_address;
11591 /* Unlink MP from its current position. Since min_mp is non-null,
11592 mp->next must be non-null. */
11593 mp->next->prev = mp->prev;
11594 if (mp->prev != NULL)
11595 mp->prev->next = mp->next;
11597 minipool_vector_head = mp->next;
11599 /* Reinsert it after MIN_MP. */
11601 mp->next = min_mp->next;
11603 if (mp->next != NULL)
11604 mp->next->prev = mp;
11606 minipool_vector_tail = mp;
11612 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11614 mp->offset = offset;
11615 if (mp->refcount > 0)
11616 offset += mp->fix_size;
11618 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11619 mp->next->min_address = mp->min_address + mp->fix_size;
11625 /* Add a constant to the minipool for a backward reference. Returns the
11626 node added or NULL if the constant will not fit in this pool.
11628 Note that the code for insertion for a backwards reference can be
11629 somewhat confusing because the calculated offsets for each fix do
11630 not take into account the size of the pool (which is still under
11633 add_minipool_backward_ref (Mfix *fix)
11635 /* If set, min_mp is the last pool_entry that has a lower constraint
11636 than the one we are trying to add. */
11637 Mnode *min_mp = NULL;
11638 /* This can be negative, since it is only a constraint. */
11639 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11642 /* If we can't reach the current pool from this insn, or if we can't
11643 insert this entry at the end of the pool without pushing other
11644 fixes out of range, then we don't try. This ensures that we
11645 can't fail later on. */
11646 if (min_address >= minipool_barrier->address
11647 || (minipool_vector_tail->min_address + fix->fix_size
11648 >= minipool_barrier->address))
11651 /* Scan the pool to see if a constant with the same value has
11652 already been added. While we are doing this, also note the
11653 location where we must insert the constant if it doesn't already
11655 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11657 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11658 && fix->mode == mp->mode
11659 && (GET_CODE (fix->value) != CODE_LABEL
11660 || (CODE_LABEL_NUMBER (fix->value)
11661 == CODE_LABEL_NUMBER (mp->value)))
11662 && rtx_equal_p (fix->value, mp->value)
11663 /* Check that there is enough slack to move this entry to the
11664 end of the table (this is conservative). */
11665 && (mp->max_address
11666 > (minipool_barrier->address
11667 + minipool_vector_tail->offset
11668 + minipool_vector_tail->fix_size)))
11671 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11674 if (min_mp != NULL)
11675 mp->min_address += fix->fix_size;
11678 /* Note the insertion point if necessary. */
11679 if (mp->min_address < min_address)
11681 /* For now, we do not allow the insertion of 8-byte alignment
11682 requiring nodes anywhere but at the start of the pool. */
11683 if (ARM_DOUBLEWORD_ALIGN
11684 && fix->fix_size >= 8 && mp->fix_size < 8)
11689 else if (mp->max_address
11690 < minipool_barrier->address + mp->offset + fix->fix_size)
11692 /* Inserting before this entry would push the fix beyond
11693 its maximum address (which can happen if we have
11694 re-located a forwards fix); force the new fix to come
11696 if (ARM_DOUBLEWORD_ALIGN
11697 && fix->fix_size >= 8 && mp->fix_size < 8)
11702 min_address = mp->min_address + fix->fix_size;
11705 /* Do not insert a non-8-byte aligned quantity before 8-byte
11706 aligned quantities. */
11707 else if (ARM_DOUBLEWORD_ALIGN
11708 && fix->fix_size < 8
11709 && mp->fix_size >= 8)
11712 min_address = mp->min_address + fix->fix_size;
11717 /* We need to create a new entry. */
11719 mp->fix_size = fix->fix_size;
11720 mp->mode = fix->mode;
11721 mp->value = fix->value;
11723 mp->max_address = minipool_barrier->address + 65536;
11725 mp->min_address = min_address;
11727 if (min_mp == NULL)
11730 mp->next = minipool_vector_head;
11732 if (mp->next == NULL)
11734 minipool_vector_tail = mp;
11735 minipool_vector_label = gen_label_rtx ();
11738 mp->next->prev = mp;
11740 minipool_vector_head = mp;
11744 mp->next = min_mp->next;
11748 if (mp->next != NULL)
11749 mp->next->prev = mp;
11751 minipool_vector_tail = mp;
11754 /* Save the new entry. */
11762 /* Scan over the following entries and adjust their offsets. */
11763 while (mp->next != NULL)
11765 if (mp->next->min_address < mp->min_address + mp->fix_size)
11766 mp->next->min_address = mp->min_address + mp->fix_size;
11769 mp->next->offset = mp->offset + mp->fix_size;
11771 mp->next->offset = mp->offset;
11780 assign_minipool_offsets (Mfix *barrier)
11782 HOST_WIDE_INT offset = 0;
11785 minipool_barrier = barrier;
11787 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11789 mp->offset = offset;
11791 if (mp->refcount > 0)
11792 offset += mp->fix_size;
11796 /* Output the literal table */
11798 dump_minipool (rtx scan)
11804 if (ARM_DOUBLEWORD_ALIGN)
11805 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11806 if (mp->refcount > 0 && mp->fix_size >= 8)
11813 fprintf (dump_file,
11814 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11815 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11817 scan = emit_label_after (gen_label_rtx (), scan);
11818 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11819 scan = emit_label_after (minipool_vector_label, scan);
11821 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11823 if (mp->refcount > 0)
11827 fprintf (dump_file,
11828 ";; Offset %u, min %ld, max %ld ",
11829 (unsigned) mp->offset, (unsigned long) mp->min_address,
11830 (unsigned long) mp->max_address);
11831 arm_print_value (dump_file, mp->value);
11832 fputc ('\n', dump_file);
11835 switch (mp->fix_size)
11837 #ifdef HAVE_consttable_1
11839 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11843 #ifdef HAVE_consttable_2
11845 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11849 #ifdef HAVE_consttable_4
11851 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11855 #ifdef HAVE_consttable_8
11857 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11861 #ifdef HAVE_consttable_16
11863 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11868 gcc_unreachable ();
11876 minipool_vector_head = minipool_vector_tail = NULL;
11877 scan = emit_insn_after (gen_consttable_end (), scan);
11878 scan = emit_barrier_after (scan);
11881 /* Return the cost of forcibly inserting a barrier after INSN. */
11883 arm_barrier_cost (rtx insn)
11885 /* Basing the location of the pool on the loop depth is preferable,
11886 but at the moment, the basic block information seems to be
11887 corrupt by this stage of the compilation. */
11888 int base_cost = 50;
11889 rtx next = next_nonnote_insn (insn);
11891 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11894 switch (GET_CODE (insn))
11897 /* It will always be better to place the table before the label, rather
11906 return base_cost - 10;
11909 return base_cost + 10;
11913 /* Find the best place in the insn stream in the range
11914 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11915 Create the barrier by inserting a jump and add a new fix entry for
11918 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11920 HOST_WIDE_INT count = 0;
11922 rtx from = fix->insn;
11923 /* The instruction after which we will insert the jump. */
11924 rtx selected = NULL;
11926 /* The address at which the jump instruction will be placed. */
11927 HOST_WIDE_INT selected_address;
11929 HOST_WIDE_INT max_count = max_address - fix->address;
11930 rtx label = gen_label_rtx ();
11932 selected_cost = arm_barrier_cost (from);
11933 selected_address = fix->address;
11935 while (from && count < max_count)
11940 /* This code shouldn't have been called if there was a natural barrier
11942 gcc_assert (GET_CODE (from) != BARRIER);
11944 /* Count the length of this insn. */
11945 count += get_attr_length (from);
11947 /* If there is a jump table, add its length. */
11948 tmp = is_jump_table (from);
11951 count += get_jump_table_size (tmp);
11953 /* Jump tables aren't in a basic block, so base the cost on
11954 the dispatch insn. If we select this location, we will
11955 still put the pool after the table. */
11956 new_cost = arm_barrier_cost (from);
11958 if (count < max_count
11959 && (!selected || new_cost <= selected_cost))
11962 selected_cost = new_cost;
11963 selected_address = fix->address + count;
11966 /* Continue after the dispatch table. */
11967 from = NEXT_INSN (tmp);
11971 new_cost = arm_barrier_cost (from);
11973 if (count < max_count
11974 && (!selected || new_cost <= selected_cost))
11977 selected_cost = new_cost;
11978 selected_address = fix->address + count;
11981 from = NEXT_INSN (from);
11984 /* Make sure that we found a place to insert the jump. */
11985 gcc_assert (selected);
11987 /* Make sure we do not split a call and its corresponding
11988 CALL_ARG_LOCATION note. */
11989 if (CALL_P (selected))
11991 rtx next = NEXT_INSN (selected);
11992 if (next && NOTE_P (next)
11993 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11997 /* Create a new JUMP_INSN that branches around a barrier. */
11998 from = emit_jump_insn_after (gen_jump (label), selected);
11999 JUMP_LABEL (from) = label;
12000 barrier = emit_barrier_after (from);
12001 emit_label_after (label, barrier);
12003 /* Create a minipool barrier entry for the new barrier. */
12004 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12005 new_fix->insn = barrier;
12006 new_fix->address = selected_address;
12007 new_fix->next = fix->next;
12008 fix->next = new_fix;
12013 /* Record that there is a natural barrier in the insn stream at
12016 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12018 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12021 fix->address = address;
12024 if (minipool_fix_head != NULL)
12025 minipool_fix_tail->next = fix;
12027 minipool_fix_head = fix;
12029 minipool_fix_tail = fix;
12032 /* Record INSN, which will need fixing up to load a value from the
12033 minipool. ADDRESS is the offset of the insn since the start of the
12034 function; LOC is a pointer to the part of the insn which requires
12035 fixing; VALUE is the constant that must be loaded, which is of type
12038 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12039 enum machine_mode mode, rtx value)
12041 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12044 fix->address = address;
12047 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12048 fix->value = value;
12049 fix->forwards = get_attr_pool_range (insn);
12050 fix->backwards = get_attr_neg_pool_range (insn);
12051 fix->minipool = NULL;
12053 /* If an insn doesn't have a range defined for it, then it isn't
12054 expecting to be reworked by this code. Better to stop now than
12055 to generate duff assembly code. */
12056 gcc_assert (fix->forwards || fix->backwards);
12058 /* If an entry requires 8-byte alignment then assume all constant pools
12059 require 4 bytes of padding. Trying to do this later on a per-pool
12060 basis is awkward because existing pool entries have to be modified. */
12061 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12066 fprintf (dump_file,
12067 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12068 GET_MODE_NAME (mode),
12069 INSN_UID (insn), (unsigned long) address,
12070 -1 * (long)fix->backwards, (long)fix->forwards);
12071 arm_print_value (dump_file, fix->value);
12072 fprintf (dump_file, "\n");
12075 /* Add it to the chain of fixes. */
12078 if (minipool_fix_head != NULL)
12079 minipool_fix_tail->next = fix;
12081 minipool_fix_head = fix;
12083 minipool_fix_tail = fix;
12086 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12087 Returns the number of insns needed, or 99 if we don't know how to
12090 arm_const_double_inline_cost (rtx val)
12092 rtx lowpart, highpart;
12093 enum machine_mode mode;
12095 mode = GET_MODE (val);
12097 if (mode == VOIDmode)
12100 gcc_assert (GET_MODE_SIZE (mode) == 8);
12102 lowpart = gen_lowpart (SImode, val);
12103 highpart = gen_highpart_mode (SImode, mode, val);
12105 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12106 gcc_assert (GET_CODE (highpart) == CONST_INT);
12108 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12109 NULL_RTX, NULL_RTX, 0, 0)
12110 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12111 NULL_RTX, NULL_RTX, 0, 0));
12114 /* Return true if it is worthwhile to split a 64-bit constant into two
12115 32-bit operations. This is the case if optimizing for size, or
12116 if we have load delay slots, or if one 32-bit part can be done with
12117 a single data operation. */
12119 arm_const_double_by_parts (rtx val)
12121 enum machine_mode mode = GET_MODE (val);
12124 if (optimize_size || arm_ld_sched)
12127 if (mode == VOIDmode)
12130 part = gen_highpart_mode (SImode, mode, val);
12132 gcc_assert (GET_CODE (part) == CONST_INT);
12134 if (const_ok_for_arm (INTVAL (part))
12135 || const_ok_for_arm (~INTVAL (part)))
12138 part = gen_lowpart (SImode, val);
12140 gcc_assert (GET_CODE (part) == CONST_INT);
12142 if (const_ok_for_arm (INTVAL (part))
12143 || const_ok_for_arm (~INTVAL (part)))
12149 /* Return true if it is possible to inline both the high and low parts
12150 of a 64-bit constant into 32-bit data processing instructions. */
12152 arm_const_double_by_immediates (rtx val)
12154 enum machine_mode mode = GET_MODE (val);
12157 if (mode == VOIDmode)
12160 part = gen_highpart_mode (SImode, mode, val);
12162 gcc_assert (GET_CODE (part) == CONST_INT);
12164 if (!const_ok_for_arm (INTVAL (part)))
12167 part = gen_lowpart (SImode, val);
12169 gcc_assert (GET_CODE (part) == CONST_INT);
12171 if (!const_ok_for_arm (INTVAL (part)))
12177 /* Scan INSN and note any of its operands that need fixing.
12178 If DO_PUSHES is false we do not actually push any of the fixups
12179 needed. The function returns TRUE if any fixups were needed/pushed.
12180 This is used by arm_memory_load_p() which needs to know about loads
12181 of constants that will be converted into minipool loads. */
12183 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12185 bool result = false;
12188 extract_insn (insn);
12190 if (!constrain_operands (1))
12191 fatal_insn_not_found (insn);
12193 if (recog_data.n_alternatives == 0)
12196 /* Fill in recog_op_alt with information about the constraints of
12198 preprocess_constraints ();
12200 for (opno = 0; opno < recog_data.n_operands; opno++)
12202 /* Things we need to fix can only occur in inputs. */
12203 if (recog_data.operand_type[opno] != OP_IN)
12206 /* If this alternative is a memory reference, then any mention
12207 of constants in this alternative is really to fool reload
12208 into allowing us to accept one there. We need to fix them up
12209 now so that we output the right code. */
12210 if (recog_op_alt[opno][which_alternative].memory_ok)
12212 rtx op = recog_data.operand[opno];
12214 if (CONSTANT_P (op))
12217 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12218 recog_data.operand_mode[opno], op);
12221 else if (GET_CODE (op) == MEM
12222 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12223 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12227 rtx cop = avoid_constant_pool_reference (op);
12229 /* Casting the address of something to a mode narrower
12230 than a word can cause avoid_constant_pool_reference()
12231 to return the pool reference itself. That's no good to
12232 us here. Lets just hope that we can use the
12233 constant pool value directly. */
12235 cop = get_pool_constant (XEXP (op, 0));
12237 push_minipool_fix (insn, address,
12238 recog_data.operand_loc[opno],
12239 recog_data.operand_mode[opno], cop);
12250 /* Convert instructions to their cc-clobbering variant if possible, since
12251 that allows us to use smaller encodings. */
12254 thumb2_reorg (void)
12259 INIT_REG_SET (&live);
12261 /* We are freeing block_for_insn in the toplev to keep compatibility
12262 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12263 compute_bb_for_insn ();
12270 COPY_REG_SET (&live, DF_LR_OUT (bb));
12271 df_simulate_initialize_backwards (bb, &live);
12272 FOR_BB_INSNS_REVERSE (bb, insn)
12274 if (NONJUMP_INSN_P (insn)
12275 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12277 rtx pat = PATTERN (insn);
12278 if (GET_CODE (pat) == SET
12279 && low_register_operand (XEXP (pat, 0), SImode)
12280 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12281 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12282 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12284 rtx dst = XEXP (pat, 0);
12285 rtx src = XEXP (pat, 1);
12286 rtx op0 = XEXP (src, 0);
12287 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12288 ? XEXP (src, 1) : NULL);
12290 if (rtx_equal_p (dst, op0)
12291 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12293 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12294 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12295 rtvec vec = gen_rtvec (2, pat, clobber);
12297 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12298 INSN_CODE (insn) = -1;
12300 /* We can also handle a commutative operation where the
12301 second operand matches the destination. */
12302 else if (op1 && rtx_equal_p (dst, op1))
12304 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12305 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12308 src = copy_rtx (src);
12309 XEXP (src, 0) = op1;
12310 XEXP (src, 1) = op0;
12311 pat = gen_rtx_SET (VOIDmode, dst, src);
12312 vec = gen_rtvec (2, pat, clobber);
12313 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12314 INSN_CODE (insn) = -1;
12319 if (NONDEBUG_INSN_P (insn))
12320 df_simulate_one_insn_backwards (bb, insn, &live);
12324 CLEAR_REG_SET (&live);
12327 /* Gcc puts the pool in the wrong place for ARM, since we can only
12328 load addresses a limited distance around the pc. We do some
12329 special munging to move the constant pool values to the correct
12330 point in the code. */
12335 HOST_WIDE_INT address = 0;
12341 minipool_fix_head = minipool_fix_tail = NULL;
12343 /* The first insn must always be a note, or the code below won't
12344 scan it properly. */
12345 insn = get_insns ();
12346 gcc_assert (GET_CODE (insn) == NOTE);
12349 /* Scan all the insns and record the operands that will need fixing. */
12350 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12352 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12353 && (arm_cirrus_insn_p (insn)
12354 || GET_CODE (insn) == JUMP_INSN
12355 || arm_memory_load_p (insn)))
12356 cirrus_reorg (insn);
12358 if (GET_CODE (insn) == BARRIER)
12359 push_minipool_barrier (insn, address);
12360 else if (INSN_P (insn))
12364 note_invalid_constants (insn, address, true);
12365 address += get_attr_length (insn);
12367 /* If the insn is a vector jump, add the size of the table
12368 and skip the table. */
12369 if ((table = is_jump_table (insn)) != NULL)
12371 address += get_jump_table_size (table);
12377 fix = minipool_fix_head;
12379 /* Now scan the fixups and perform the required changes. */
12384 Mfix * last_added_fix;
12385 Mfix * last_barrier = NULL;
12388 /* Skip any further barriers before the next fix. */
12389 while (fix && GET_CODE (fix->insn) == BARRIER)
12392 /* No more fixes. */
12396 last_added_fix = NULL;
12398 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12400 if (GET_CODE (ftmp->insn) == BARRIER)
12402 if (ftmp->address >= minipool_vector_head->max_address)
12405 last_barrier = ftmp;
12407 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12410 last_added_fix = ftmp; /* Keep track of the last fix added. */
12413 /* If we found a barrier, drop back to that; any fixes that we
12414 could have reached but come after the barrier will now go in
12415 the next mini-pool. */
12416 if (last_barrier != NULL)
12418 /* Reduce the refcount for those fixes that won't go into this
12420 for (fdel = last_barrier->next;
12421 fdel && fdel != ftmp;
12424 fdel->minipool->refcount--;
12425 fdel->minipool = NULL;
12428 ftmp = last_barrier;
12432 /* ftmp is first fix that we can't fit into this pool and
12433 there no natural barriers that we could use. Insert a
12434 new barrier in the code somewhere between the previous
12435 fix and this one, and arrange to jump around it. */
12436 HOST_WIDE_INT max_address;
12438 /* The last item on the list of fixes must be a barrier, so
12439 we can never run off the end of the list of fixes without
12440 last_barrier being set. */
12443 max_address = minipool_vector_head->max_address;
12444 /* Check that there isn't another fix that is in range that
12445 we couldn't fit into this pool because the pool was
12446 already too large: we need to put the pool before such an
12447 instruction. The pool itself may come just after the
12448 fix because create_fix_barrier also allows space for a
12449 jump instruction. */
12450 if (ftmp->address < max_address)
12451 max_address = ftmp->address + 1;
12453 last_barrier = create_fix_barrier (last_added_fix, max_address);
12456 assign_minipool_offsets (last_barrier);
12460 if (GET_CODE (ftmp->insn) != BARRIER
12461 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12468 /* Scan over the fixes we have identified for this pool, fixing them
12469 up and adding the constants to the pool itself. */
12470 for (this_fix = fix; this_fix && ftmp != this_fix;
12471 this_fix = this_fix->next)
12472 if (GET_CODE (this_fix->insn) != BARRIER)
12475 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12476 minipool_vector_label),
12477 this_fix->minipool->offset);
12478 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12481 dump_minipool (last_barrier->insn);
12485 /* From now on we must synthesize any constants that we can't handle
12486 directly. This can happen if the RTL gets split during final
12487 instruction generation. */
12488 after_arm_reorg = 1;
12490 /* Free the minipool memory. */
12491 obstack_free (&minipool_obstack, minipool_startobj);
12494 /* Routines to output assembly language. */
12496 /* If the rtx is the correct value then return the string of the number.
12497 In this way we can ensure that valid double constants are generated even
12498 when cross compiling. */
12500 fp_immediate_constant (rtx x)
12505 if (!fp_consts_inited)
12508 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12509 for (i = 0; i < 8; i++)
12510 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12511 return strings_fp[i];
12513 gcc_unreachable ();
12516 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12517 static const char *
12518 fp_const_from_val (REAL_VALUE_TYPE *r)
12522 if (!fp_consts_inited)
12525 for (i = 0; i < 8; i++)
12526 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12527 return strings_fp[i];
12529 gcc_unreachable ();
12532 /* Output the operands of a LDM/STM instruction to STREAM.
12533 MASK is the ARM register set mask of which only bits 0-15 are important.
12534 REG is the base register, either the frame pointer or the stack pointer,
12535 INSTR is the possibly suffixed load or store instruction.
12536 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12539 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12540 unsigned long mask, int rfe)
12543 bool not_first = FALSE;
12545 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12546 fputc ('\t', stream);
12547 asm_fprintf (stream, instr, reg);
12548 fputc ('{', stream);
12550 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12551 if (mask & (1 << i))
12554 fprintf (stream, ", ");
12556 asm_fprintf (stream, "%r", i);
12561 fprintf (stream, "}^\n");
12563 fprintf (stream, "}\n");
12567 /* Output a FLDMD instruction to STREAM.
12568 BASE if the register containing the address.
12569 REG and COUNT specify the register range.
12570 Extra registers may be added to avoid hardware bugs.
12572 We output FLDMD even for ARMv5 VFP implementations. Although
12573 FLDMD is technically not supported until ARMv6, it is believed
12574 that all VFP implementations support its use in this context. */
12577 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12581 /* Workaround ARM10 VFPr1 bug. */
12582 if (count == 2 && !arm_arch6)
12589 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12590 load into multiple parts if we have to handle more than 16 registers. */
12593 vfp_output_fldmd (stream, base, reg, 16);
12594 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12598 fputc ('\t', stream);
12599 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12601 for (i = reg; i < reg + count; i++)
12604 fputs (", ", stream);
12605 asm_fprintf (stream, "d%d", i);
12607 fputs ("}\n", stream);
12612 /* Output the assembly for a store multiple. */
12615 vfp_output_fstmd (rtx * operands)
12622 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12623 p = strlen (pattern);
12625 gcc_assert (GET_CODE (operands[1]) == REG);
12627 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12628 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12630 p += sprintf (&pattern[p], ", d%d", base + i);
12632 strcpy (&pattern[p], "}");
12634 output_asm_insn (pattern, operands);
12639 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12640 number of bytes pushed. */
12643 vfp_emit_fstmd (int base_reg, int count)
12650 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12651 register pairs are stored by a store multiple insn. We avoid this
12652 by pushing an extra pair. */
12653 if (count == 2 && !arm_arch6)
12655 if (base_reg == LAST_VFP_REGNUM - 3)
12660 /* FSTMD may not store more than 16 doubleword registers at once. Split
12661 larger stores into multiple parts (up to a maximum of two, in
12666 /* NOTE: base_reg is an internal register number, so each D register
12668 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12669 saved += vfp_emit_fstmd (base_reg, 16);
12673 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12674 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12676 reg = gen_rtx_REG (DFmode, base_reg);
12679 XVECEXP (par, 0, 0)
12680 = gen_rtx_SET (VOIDmode,
12683 gen_rtx_PRE_MODIFY (Pmode,
12686 (stack_pointer_rtx,
12689 gen_rtx_UNSPEC (BLKmode,
12690 gen_rtvec (1, reg),
12691 UNSPEC_PUSH_MULT));
12693 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12694 plus_constant (stack_pointer_rtx, -(count * 8)));
12695 RTX_FRAME_RELATED_P (tmp) = 1;
12696 XVECEXP (dwarf, 0, 0) = tmp;
12698 tmp = gen_rtx_SET (VOIDmode,
12699 gen_frame_mem (DFmode, stack_pointer_rtx),
12701 RTX_FRAME_RELATED_P (tmp) = 1;
12702 XVECEXP (dwarf, 0, 1) = tmp;
12704 for (i = 1; i < count; i++)
12706 reg = gen_rtx_REG (DFmode, base_reg);
12708 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12710 tmp = gen_rtx_SET (VOIDmode,
12711 gen_frame_mem (DFmode,
12712 plus_constant (stack_pointer_rtx,
12715 RTX_FRAME_RELATED_P (tmp) = 1;
12716 XVECEXP (dwarf, 0, i + 1) = tmp;
12719 par = emit_insn (par);
12720 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12721 RTX_FRAME_RELATED_P (par) = 1;
12726 /* Emit a call instruction with pattern PAT. ADDR is the address of
12727 the call target. */
12730 arm_emit_call_insn (rtx pat, rtx addr)
12734 insn = emit_call_insn (pat);
12736 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12737 If the call might use such an entry, add a use of the PIC register
12738 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12739 if (TARGET_VXWORKS_RTP
12741 && GET_CODE (addr) == SYMBOL_REF
12742 && (SYMBOL_REF_DECL (addr)
12743 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12744 : !SYMBOL_REF_LOCAL_P (addr)))
12746 require_pic_register ();
12747 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12751 /* Output a 'call' insn. */
12753 output_call (rtx *operands)
12755 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12757 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12758 if (REGNO (operands[0]) == LR_REGNUM)
12760 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12761 output_asm_insn ("mov%?\t%0, %|lr", operands);
12764 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12766 if (TARGET_INTERWORK || arm_arch4t)
12767 output_asm_insn ("bx%?\t%0", operands);
12769 output_asm_insn ("mov%?\t%|pc, %0", operands);
12774 /* Output a 'call' insn that is a reference in memory. This is
12775 disabled for ARMv5 and we prefer a blx instead because otherwise
12776 there's a significant performance overhead. */
12778 output_call_mem (rtx *operands)
12780 gcc_assert (!arm_arch5);
12781 if (TARGET_INTERWORK)
12783 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12784 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12785 output_asm_insn ("bx%?\t%|ip", operands);
12787 else if (regno_use_in (LR_REGNUM, operands[0]))
12789 /* LR is used in the memory address. We load the address in the
12790 first instruction. It's safe to use IP as the target of the
12791 load since the call will kill it anyway. */
12792 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12793 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12795 output_asm_insn ("bx%?\t%|ip", operands);
12797 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12801 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12802 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12809 /* Output a move from arm registers to an fpa registers.
12810 OPERANDS[0] is an fpa register.
12811 OPERANDS[1] is the first registers of an arm register pair. */
12813 output_mov_long_double_fpa_from_arm (rtx *operands)
12815 int arm_reg0 = REGNO (operands[1]);
12818 gcc_assert (arm_reg0 != IP_REGNUM);
12820 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12821 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12822 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12824 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12825 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12830 /* Output a move from an fpa register to arm registers.
12831 OPERANDS[0] is the first registers of an arm register pair.
12832 OPERANDS[1] is an fpa register. */
12834 output_mov_long_double_arm_from_fpa (rtx *operands)
12836 int arm_reg0 = REGNO (operands[0]);
12839 gcc_assert (arm_reg0 != IP_REGNUM);
12841 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12842 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12843 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12845 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12846 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12850 /* Output a move from arm registers to arm registers of a long double
12851 OPERANDS[0] is the destination.
12852 OPERANDS[1] is the source. */
12854 output_mov_long_double_arm_from_arm (rtx *operands)
12856 /* We have to be careful here because the two might overlap. */
12857 int dest_start = REGNO (operands[0]);
12858 int src_start = REGNO (operands[1]);
12862 if (dest_start < src_start)
12864 for (i = 0; i < 3; i++)
12866 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12867 ops[1] = gen_rtx_REG (SImode, src_start + i);
12868 output_asm_insn ("mov%?\t%0, %1", ops);
12873 for (i = 2; i >= 0; i--)
12875 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12876 ops[1] = gen_rtx_REG (SImode, src_start + i);
12877 output_asm_insn ("mov%?\t%0, %1", ops);
12885 arm_emit_movpair (rtx dest, rtx src)
12887 /* If the src is an immediate, simplify it. */
12888 if (CONST_INT_P (src))
12890 HOST_WIDE_INT val = INTVAL (src);
12891 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12892 if ((val >> 16) & 0x0000ffff)
12893 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12895 GEN_INT ((val >> 16) & 0x0000ffff));
12898 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12899 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12902 /* Output a move from arm registers to an fpa registers.
12903 OPERANDS[0] is an fpa register.
12904 OPERANDS[1] is the first registers of an arm register pair. */
12906 output_mov_double_fpa_from_arm (rtx *operands)
12908 int arm_reg0 = REGNO (operands[1]);
12911 gcc_assert (arm_reg0 != IP_REGNUM);
12913 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12914 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12915 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12916 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12920 /* Output a move from an fpa register to arm registers.
12921 OPERANDS[0] is the first registers of an arm register pair.
12922 OPERANDS[1] is an fpa register. */
12924 output_mov_double_arm_from_fpa (rtx *operands)
12926 int arm_reg0 = REGNO (operands[0]);
12929 gcc_assert (arm_reg0 != IP_REGNUM);
12931 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12932 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12933 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12934 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12938 /* Output a move between double words. It must be REG<-MEM
12941 output_move_double (rtx *operands)
12943 enum rtx_code code0 = GET_CODE (operands[0]);
12944 enum rtx_code code1 = GET_CODE (operands[1]);
12949 unsigned int reg0 = REGNO (operands[0]);
12951 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12953 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12955 switch (GET_CODE (XEXP (operands[1], 0)))
12959 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12960 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12962 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12966 gcc_assert (TARGET_LDRD);
12967 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12972 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12974 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12979 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12981 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12985 gcc_assert (TARGET_LDRD);
12986 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12991 /* Autoicrement addressing modes should never have overlapping
12992 base and destination registers, and overlapping index registers
12993 are already prohibited, so this doesn't need to worry about
12995 otherops[0] = operands[0];
12996 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12997 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12999 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13001 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13003 /* Registers overlap so split out the increment. */
13004 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13005 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13009 /* Use a single insn if we can.
13010 FIXME: IWMMXT allows offsets larger than ldrd can
13011 handle, fix these up with a pair of ldr. */
13013 || GET_CODE (otherops[2]) != CONST_INT
13014 || (INTVAL (otherops[2]) > -256
13015 && INTVAL (otherops[2]) < 256))
13016 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13019 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13020 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13026 /* Use a single insn if we can.
13027 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13028 fix these up with a pair of ldr. */
13030 || GET_CODE (otherops[2]) != CONST_INT
13031 || (INTVAL (otherops[2]) > -256
13032 && INTVAL (otherops[2]) < 256))
13033 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13036 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13037 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13044 /* We might be able to use ldrd %0, %1 here. However the range is
13045 different to ldr/adr, and it is broken on some ARMv7-M
13046 implementations. */
13047 /* Use the second register of the pair to avoid problematic
13049 otherops[1] = operands[1];
13050 output_asm_insn ("adr%?\t%0, %1", otherops);
13051 operands[1] = otherops[0];
13053 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13055 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13058 /* ??? This needs checking for thumb2. */
13060 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13061 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13063 otherops[0] = operands[0];
13064 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13065 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13067 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13069 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13071 switch ((int) INTVAL (otherops[2]))
13074 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13079 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13084 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13088 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13089 operands[1] = otherops[0];
13091 && (GET_CODE (otherops[2]) == REG
13093 || (GET_CODE (otherops[2]) == CONST_INT
13094 && INTVAL (otherops[2]) > -256
13095 && INTVAL (otherops[2]) < 256)))
13097 if (reg_overlap_mentioned_p (operands[0],
13101 /* Swap base and index registers over to
13102 avoid a conflict. */
13104 otherops[1] = otherops[2];
13107 /* If both registers conflict, it will usually
13108 have been fixed by a splitter. */
13109 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13110 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13112 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13113 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13117 otherops[0] = operands[0];
13118 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13123 if (GET_CODE (otherops[2]) == CONST_INT)
13125 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13126 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13128 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13131 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13134 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13137 return "ldr%(d%)\t%0, [%1]";
13139 return "ldm%(ia%)\t%1, %M0";
13143 otherops[1] = adjust_address (operands[1], SImode, 4);
13144 /* Take care of overlapping base/data reg. */
13145 if (reg_mentioned_p (operands[0], operands[1]))
13147 output_asm_insn ("ldr%?\t%0, %1", otherops);
13148 output_asm_insn ("ldr%?\t%0, %1", operands);
13152 output_asm_insn ("ldr%?\t%0, %1", operands);
13153 output_asm_insn ("ldr%?\t%0, %1", otherops);
13160 /* Constraints should ensure this. */
13161 gcc_assert (code0 == MEM && code1 == REG);
13162 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13164 switch (GET_CODE (XEXP (operands[0], 0)))
13168 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13170 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13174 gcc_assert (TARGET_LDRD);
13175 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13180 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13182 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13187 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13189 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13193 gcc_assert (TARGET_LDRD);
13194 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13199 otherops[0] = operands[1];
13200 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13201 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13203 /* IWMMXT allows offsets larger than ldrd can handle,
13204 fix these up with a pair of ldr. */
13206 && GET_CODE (otherops[2]) == CONST_INT
13207 && (INTVAL(otherops[2]) <= -256
13208 || INTVAL(otherops[2]) >= 256))
13210 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13212 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13213 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13217 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13218 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13221 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13222 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13224 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13228 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13229 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13231 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13234 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13240 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13246 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13251 && (GET_CODE (otherops[2]) == REG
13253 || (GET_CODE (otherops[2]) == CONST_INT
13254 && INTVAL (otherops[2]) > -256
13255 && INTVAL (otherops[2]) < 256)))
13257 otherops[0] = operands[1];
13258 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13259 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13265 otherops[0] = adjust_address (operands[0], SImode, 4);
13266 otherops[1] = operands[1];
13267 output_asm_insn ("str%?\t%1, %0", operands);
13268 output_asm_insn ("str%?\t%H1, %0", otherops);
13275 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13276 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13279 output_move_quad (rtx *operands)
13281 if (REG_P (operands[0]))
13283 /* Load, or reg->reg move. */
13285 if (MEM_P (operands[1]))
13287 switch (GET_CODE (XEXP (operands[1], 0)))
13290 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13295 output_asm_insn ("adr%?\t%0, %1", operands);
13296 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13300 gcc_unreachable ();
13308 gcc_assert (REG_P (operands[1]));
13310 dest = REGNO (operands[0]);
13311 src = REGNO (operands[1]);
13313 /* This seems pretty dumb, but hopefully GCC won't try to do it
13316 for (i = 0; i < 4; i++)
13318 ops[0] = gen_rtx_REG (SImode, dest + i);
13319 ops[1] = gen_rtx_REG (SImode, src + i);
13320 output_asm_insn ("mov%?\t%0, %1", ops);
13323 for (i = 3; i >= 0; i--)
13325 ops[0] = gen_rtx_REG (SImode, dest + i);
13326 ops[1] = gen_rtx_REG (SImode, src + i);
13327 output_asm_insn ("mov%?\t%0, %1", ops);
13333 gcc_assert (MEM_P (operands[0]));
13334 gcc_assert (REG_P (operands[1]));
13335 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13337 switch (GET_CODE (XEXP (operands[0], 0)))
13340 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13344 gcc_unreachable ();
13351 /* Output a VFP load or store instruction. */
13354 output_move_vfp (rtx *operands)
13356 rtx reg, mem, addr, ops[2];
13357 int load = REG_P (operands[0]);
13358 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13359 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13362 enum machine_mode mode;
13364 reg = operands[!load];
13365 mem = operands[load];
13367 mode = GET_MODE (reg);
13369 gcc_assert (REG_P (reg));
13370 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13371 gcc_assert (mode == SFmode
13375 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13376 gcc_assert (MEM_P (mem));
13378 addr = XEXP (mem, 0);
13380 switch (GET_CODE (addr))
13383 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13384 ops[0] = XEXP (addr, 0);
13389 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13390 ops[0] = XEXP (addr, 0);
13395 templ = "f%s%c%%?\t%%%s0, %%1%s";
13401 sprintf (buff, templ,
13402 load ? "ld" : "st",
13405 integer_p ? "\t%@ int" : "");
13406 output_asm_insn (buff, ops);
13411 /* Output a Neon quad-word load or store, or a load or store for
13412 larger structure modes.
13414 WARNING: The ordering of elements is weird in big-endian mode,
13415 because we use VSTM, as required by the EABI. GCC RTL defines
13416 element ordering based on in-memory order. This can be differ
13417 from the architectural ordering of elements within a NEON register.
13418 The intrinsics defined in arm_neon.h use the NEON register element
13419 ordering, not the GCC RTL element ordering.
13421 For example, the in-memory ordering of a big-endian a quadword
13422 vector with 16-bit elements when stored from register pair {d0,d1}
13423 will be (lowest address first, d0[N] is NEON register element N):
13425 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13427 When necessary, quadword registers (dN, dN+1) are moved to ARM
13428 registers from rN in the order:
13430 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13432 So that STM/LDM can be used on vectors in ARM registers, and the
13433 same memory layout will result as if VSTM/VLDM were used. */
13436 output_move_neon (rtx *operands)
13438 rtx reg, mem, addr, ops[2];
13439 int regno, load = REG_P (operands[0]);
13442 enum machine_mode mode;
13444 reg = operands[!load];
13445 mem = operands[load];
13447 mode = GET_MODE (reg);
13449 gcc_assert (REG_P (reg));
13450 regno = REGNO (reg);
13451 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13452 || NEON_REGNO_OK_FOR_QUAD (regno));
13453 gcc_assert (VALID_NEON_DREG_MODE (mode)
13454 || VALID_NEON_QREG_MODE (mode)
13455 || VALID_NEON_STRUCT_MODE (mode));
13456 gcc_assert (MEM_P (mem));
13458 addr = XEXP (mem, 0);
13460 /* Strip off const from addresses like (const (plus (...))). */
13461 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13462 addr = XEXP (addr, 0);
13464 switch (GET_CODE (addr))
13467 templ = "v%smia%%?\t%%0!, %%h1";
13468 ops[0] = XEXP (addr, 0);
13473 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13474 templ = "v%smdb%%?\t%%0!, %%h1";
13475 ops[0] = XEXP (addr, 0);
13480 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13481 gcc_unreachable ();
13486 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13489 for (i = 0; i < nregs; i++)
13491 /* We're only using DImode here because it's a convenient size. */
13492 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13493 ops[1] = adjust_address (mem, DImode, 8 * i);
13494 if (reg_overlap_mentioned_p (ops[0], mem))
13496 gcc_assert (overlap == -1);
13501 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13502 output_asm_insn (buff, ops);
13507 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13508 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13509 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13510 output_asm_insn (buff, ops);
13517 templ = "v%smia%%?\t%%m0, %%h1";
13522 sprintf (buff, templ, load ? "ld" : "st");
13523 output_asm_insn (buff, ops);
13528 /* Compute and return the length of neon_mov<mode>, where <mode> is
13529 one of VSTRUCT modes: EI, OI, CI or XI. */
13531 arm_attr_length_move_neon (rtx insn)
13533 rtx reg, mem, addr;
13535 enum machine_mode mode;
13537 extract_insn_cached (insn);
13539 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13541 mode = GET_MODE (recog_data.operand[0]);
13552 gcc_unreachable ();
13556 load = REG_P (recog_data.operand[0]);
13557 reg = recog_data.operand[!load];
13558 mem = recog_data.operand[load];
13560 gcc_assert (MEM_P (mem));
13562 mode = GET_MODE (reg);
13563 addr = XEXP (mem, 0);
13565 /* Strip off const from addresses like (const (plus (...))). */
13566 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13567 addr = XEXP (addr, 0);
13569 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13571 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13578 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13582 arm_address_offset_is_imm (rtx insn)
13586 extract_insn_cached (insn);
13588 if (REG_P (recog_data.operand[0]))
13591 mem = recog_data.operand[0];
13593 gcc_assert (MEM_P (mem));
13595 addr = XEXP (mem, 0);
13597 if (GET_CODE (addr) == REG
13598 || (GET_CODE (addr) == PLUS
13599 && GET_CODE (XEXP (addr, 0)) == REG
13600 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13606 /* Output an ADD r, s, #n where n may be too big for one instruction.
13607 If adding zero to one register, output nothing. */
13609 output_add_immediate (rtx *operands)
13611 HOST_WIDE_INT n = INTVAL (operands[2]);
13613 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13616 output_multi_immediate (operands,
13617 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13620 output_multi_immediate (operands,
13621 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13628 /* Output a multiple immediate operation.
13629 OPERANDS is the vector of operands referred to in the output patterns.
13630 INSTR1 is the output pattern to use for the first constant.
13631 INSTR2 is the output pattern to use for subsequent constants.
13632 IMMED_OP is the index of the constant slot in OPERANDS.
13633 N is the constant value. */
13634 static const char *
13635 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13636 int immed_op, HOST_WIDE_INT n)
13638 #if HOST_BITS_PER_WIDE_INT > 32
13644 /* Quick and easy output. */
13645 operands[immed_op] = const0_rtx;
13646 output_asm_insn (instr1, operands);
13651 const char * instr = instr1;
13653 /* Note that n is never zero here (which would give no output). */
13654 for (i = 0; i < 32; i += 2)
13658 operands[immed_op] = GEN_INT (n & (255 << i));
13659 output_asm_insn (instr, operands);
13669 /* Return the name of a shifter operation. */
13670 static const char *
13671 arm_shift_nmem(enum rtx_code code)
13676 return ARM_LSL_NAME;
13692 /* Return the appropriate ARM instruction for the operation code.
13693 The returned result should not be overwritten. OP is the rtx of the
13694 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13697 arithmetic_instr (rtx op, int shift_first_arg)
13699 switch (GET_CODE (op))
13705 return shift_first_arg ? "rsb" : "sub";
13720 return arm_shift_nmem(GET_CODE(op));
13723 gcc_unreachable ();
13727 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13728 for the operation code. The returned result should not be overwritten.
13729 OP is the rtx code of the shift.
13730 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13732 static const char *
13733 shift_op (rtx op, HOST_WIDE_INT *amountp)
13736 enum rtx_code code = GET_CODE (op);
13738 switch (GET_CODE (XEXP (op, 1)))
13746 *amountp = INTVAL (XEXP (op, 1));
13750 gcc_unreachable ();
13756 gcc_assert (*amountp != -1);
13757 *amountp = 32 - *amountp;
13760 /* Fall through. */
13766 mnem = arm_shift_nmem(code);
13770 /* We never have to worry about the amount being other than a
13771 power of 2, since this case can never be reloaded from a reg. */
13772 gcc_assert (*amountp != -1);
13773 *amountp = int_log2 (*amountp);
13774 return ARM_LSL_NAME;
13777 gcc_unreachable ();
13780 if (*amountp != -1)
13782 /* This is not 100% correct, but follows from the desire to merge
13783 multiplication by a power of 2 with the recognizer for a
13784 shift. >=32 is not a valid shift for "lsl", so we must try and
13785 output a shift that produces the correct arithmetical result.
13786 Using lsr #32 is identical except for the fact that the carry bit
13787 is not set correctly if we set the flags; but we never use the
13788 carry bit from such an operation, so we can ignore that. */
13789 if (code == ROTATERT)
13790 /* Rotate is just modulo 32. */
13792 else if (*amountp != (*amountp & 31))
13794 if (code == ASHIFT)
13799 /* Shifts of 0 are no-ops. */
13807 /* Obtain the shift from the POWER of two. */
13809 static HOST_WIDE_INT
13810 int_log2 (HOST_WIDE_INT power)
13812 HOST_WIDE_INT shift = 0;
13814 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13816 gcc_assert (shift <= 31);
13823 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13824 because /bin/as is horribly restrictive. The judgement about
13825 whether or not each character is 'printable' (and can be output as
13826 is) or not (and must be printed with an octal escape) must be made
13827 with reference to the *host* character set -- the situation is
13828 similar to that discussed in the comments above pp_c_char in
13829 c-pretty-print.c. */
13831 #define MAX_ASCII_LEN 51
13834 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13837 int len_so_far = 0;
13839 fputs ("\t.ascii\t\"", stream);
13841 for (i = 0; i < len; i++)
13845 if (len_so_far >= MAX_ASCII_LEN)
13847 fputs ("\"\n\t.ascii\t\"", stream);
13853 if (c == '\\' || c == '\"')
13855 putc ('\\', stream);
13863 fprintf (stream, "\\%03o", c);
13868 fputs ("\"\n", stream);
13871 /* Compute the register save mask for registers 0 through 12
13872 inclusive. This code is used by arm_compute_save_reg_mask. */
13874 static unsigned long
13875 arm_compute_save_reg0_reg12_mask (void)
13877 unsigned long func_type = arm_current_func_type ();
13878 unsigned long save_reg_mask = 0;
13881 if (IS_INTERRUPT (func_type))
13883 unsigned int max_reg;
13884 /* Interrupt functions must not corrupt any registers,
13885 even call clobbered ones. If this is a leaf function
13886 we can just examine the registers used by the RTL, but
13887 otherwise we have to assume that whatever function is
13888 called might clobber anything, and so we have to save
13889 all the call-clobbered registers as well. */
13890 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13891 /* FIQ handlers have registers r8 - r12 banked, so
13892 we only need to check r0 - r7, Normal ISRs only
13893 bank r14 and r15, so we must check up to r12.
13894 r13 is the stack pointer which is always preserved,
13895 so we do not need to consider it here. */
13900 for (reg = 0; reg <= max_reg; reg++)
13901 if (df_regs_ever_live_p (reg)
13902 || (! current_function_is_leaf && call_used_regs[reg]))
13903 save_reg_mask |= (1 << reg);
13905 /* Also save the pic base register if necessary. */
13907 && !TARGET_SINGLE_PIC_BASE
13908 && arm_pic_register != INVALID_REGNUM
13909 && crtl->uses_pic_offset_table)
13910 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13912 else if (IS_VOLATILE(func_type))
13914 /* For noreturn functions we historically omitted register saves
13915 altogether. However this really messes up debugging. As a
13916 compromise save just the frame pointers. Combined with the link
13917 register saved elsewhere this should be sufficient to get
13919 if (frame_pointer_needed)
13920 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13921 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13922 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13923 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13924 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13928 /* In the normal case we only need to save those registers
13929 which are call saved and which are used by this function. */
13930 for (reg = 0; reg <= 11; reg++)
13931 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13932 save_reg_mask |= (1 << reg);
13934 /* Handle the frame pointer as a special case. */
13935 if (frame_pointer_needed)
13936 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13938 /* If we aren't loading the PIC register,
13939 don't stack it even though it may be live. */
13941 && !TARGET_SINGLE_PIC_BASE
13942 && arm_pic_register != INVALID_REGNUM
13943 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13944 || crtl->uses_pic_offset_table))
13945 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13947 /* The prologue will copy SP into R0, so save it. */
13948 if (IS_STACKALIGN (func_type))
13949 save_reg_mask |= 1;
13952 /* Save registers so the exception handler can modify them. */
13953 if (crtl->calls_eh_return)
13959 reg = EH_RETURN_DATA_REGNO (i);
13960 if (reg == INVALID_REGNUM)
13962 save_reg_mask |= 1 << reg;
13966 return save_reg_mask;
13970 /* Compute the number of bytes used to store the static chain register on the
13971 stack, above the stack frame. We need to know this accurately to get the
13972 alignment of the rest of the stack frame correct. */
13974 static int arm_compute_static_chain_stack_bytes (void)
13976 unsigned long func_type = arm_current_func_type ();
13977 int static_chain_stack_bytes = 0;
13979 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13980 IS_NESTED (func_type) &&
13981 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13982 static_chain_stack_bytes = 4;
13984 return static_chain_stack_bytes;
13988 /* Compute a bit mask of which registers need to be
13989 saved on the stack for the current function.
13990 This is used by arm_get_frame_offsets, which may add extra registers. */
13992 static unsigned long
13993 arm_compute_save_reg_mask (void)
13995 unsigned int save_reg_mask = 0;
13996 unsigned long func_type = arm_current_func_type ();
13999 if (IS_NAKED (func_type))
14000 /* This should never really happen. */
14003 /* If we are creating a stack frame, then we must save the frame pointer,
14004 IP (which will hold the old stack pointer), LR and the PC. */
14005 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14007 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14010 | (1 << PC_REGNUM);
14012 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14014 /* Decide if we need to save the link register.
14015 Interrupt routines have their own banked link register,
14016 so they never need to save it.
14017 Otherwise if we do not use the link register we do not need to save
14018 it. If we are pushing other registers onto the stack however, we
14019 can save an instruction in the epilogue by pushing the link register
14020 now and then popping it back into the PC. This incurs extra memory
14021 accesses though, so we only do it when optimizing for size, and only
14022 if we know that we will not need a fancy return sequence. */
14023 if (df_regs_ever_live_p (LR_REGNUM)
14026 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14027 && !crtl->calls_eh_return))
14028 save_reg_mask |= 1 << LR_REGNUM;
14030 if (cfun->machine->lr_save_eliminated)
14031 save_reg_mask &= ~ (1 << LR_REGNUM);
14033 if (TARGET_REALLY_IWMMXT
14034 && ((bit_count (save_reg_mask)
14035 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14036 arm_compute_static_chain_stack_bytes())
14039 /* The total number of registers that are going to be pushed
14040 onto the stack is odd. We need to ensure that the stack
14041 is 64-bit aligned before we start to save iWMMXt registers,
14042 and also before we start to create locals. (A local variable
14043 might be a double or long long which we will load/store using
14044 an iWMMXt instruction). Therefore we need to push another
14045 ARM register, so that the stack will be 64-bit aligned. We
14046 try to avoid using the arg registers (r0 -r3) as they might be
14047 used to pass values in a tail call. */
14048 for (reg = 4; reg <= 12; reg++)
14049 if ((save_reg_mask & (1 << reg)) == 0)
14053 save_reg_mask |= (1 << reg);
14056 cfun->machine->sibcall_blocked = 1;
14057 save_reg_mask |= (1 << 3);
14061 /* We may need to push an additional register for use initializing the
14062 PIC base register. */
14063 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14064 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14066 reg = thumb_find_work_register (1 << 4);
14067 if (!call_used_regs[reg])
14068 save_reg_mask |= (1 << reg);
14071 return save_reg_mask;
14075 /* Compute a bit mask of which registers need to be
14076 saved on the stack for the current function. */
14077 static unsigned long
14078 thumb1_compute_save_reg_mask (void)
14080 unsigned long mask;
14084 for (reg = 0; reg < 12; reg ++)
14085 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14089 && !TARGET_SINGLE_PIC_BASE
14090 && arm_pic_register != INVALID_REGNUM
14091 && crtl->uses_pic_offset_table)
14092 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14094 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14095 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14096 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14098 /* LR will also be pushed if any lo regs are pushed. */
14099 if (mask & 0xff || thumb_force_lr_save ())
14100 mask |= (1 << LR_REGNUM);
14102 /* Make sure we have a low work register if we need one.
14103 We will need one if we are going to push a high register,
14104 but we are not currently intending to push a low register. */
14105 if ((mask & 0xff) == 0
14106 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14108 /* Use thumb_find_work_register to choose which register
14109 we will use. If the register is live then we will
14110 have to push it. Use LAST_LO_REGNUM as our fallback
14111 choice for the register to select. */
14112 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14113 /* Make sure the register returned by thumb_find_work_register is
14114 not part of the return value. */
14115 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14116 reg = LAST_LO_REGNUM;
14118 if (! call_used_regs[reg])
14122 /* The 504 below is 8 bytes less than 512 because there are two possible
14123 alignment words. We can't tell here if they will be present or not so we
14124 have to play it safe and assume that they are. */
14125 if ((CALLER_INTERWORKING_SLOT_SIZE +
14126 ROUND_UP_WORD (get_frame_size ()) +
14127 crtl->outgoing_args_size) >= 504)
14129 /* This is the same as the code in thumb1_expand_prologue() which
14130 determines which register to use for stack decrement. */
14131 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14132 if (mask & (1 << reg))
14135 if (reg > LAST_LO_REGNUM)
14137 /* Make sure we have a register available for stack decrement. */
14138 mask |= 1 << LAST_LO_REGNUM;
14146 /* Return the number of bytes required to save VFP registers. */
14148 arm_get_vfp_saved_size (void)
14150 unsigned int regno;
14155 /* Space for saved VFP registers. */
14156 if (TARGET_HARD_FLOAT && TARGET_VFP)
14159 for (regno = FIRST_VFP_REGNUM;
14160 regno < LAST_VFP_REGNUM;
14163 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14164 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14168 /* Workaround ARM10 VFPr1 bug. */
14169 if (count == 2 && !arm_arch6)
14171 saved += count * 8;
14180 if (count == 2 && !arm_arch6)
14182 saved += count * 8;
14189 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14190 everything bar the final return instruction. */
14192 output_return_instruction (rtx operand, int really_return, int reverse)
14194 char conditional[10];
14197 unsigned long live_regs_mask;
14198 unsigned long func_type;
14199 arm_stack_offsets *offsets;
14201 func_type = arm_current_func_type ();
14203 if (IS_NAKED (func_type))
14206 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14208 /* If this function was declared non-returning, and we have
14209 found a tail call, then we have to trust that the called
14210 function won't return. */
14215 /* Otherwise, trap an attempted return by aborting. */
14217 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14219 assemble_external_libcall (ops[1]);
14220 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14226 gcc_assert (!cfun->calls_alloca || really_return);
14228 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14230 cfun->machine->return_used_this_function = 1;
14232 offsets = arm_get_frame_offsets ();
14233 live_regs_mask = offsets->saved_regs_mask;
14235 if (live_regs_mask)
14237 const char * return_reg;
14239 /* If we do not have any special requirements for function exit
14240 (e.g. interworking) then we can load the return address
14241 directly into the PC. Otherwise we must load it into LR. */
14243 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14244 return_reg = reg_names[PC_REGNUM];
14246 return_reg = reg_names[LR_REGNUM];
14248 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14250 /* There are three possible reasons for the IP register
14251 being saved. 1) a stack frame was created, in which case
14252 IP contains the old stack pointer, or 2) an ISR routine
14253 corrupted it, or 3) it was saved to align the stack on
14254 iWMMXt. In case 1, restore IP into SP, otherwise just
14256 if (frame_pointer_needed)
14258 live_regs_mask &= ~ (1 << IP_REGNUM);
14259 live_regs_mask |= (1 << SP_REGNUM);
14262 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14265 /* On some ARM architectures it is faster to use LDR rather than
14266 LDM to load a single register. On other architectures, the
14267 cost is the same. In 26 bit mode, or for exception handlers,
14268 we have to use LDM to load the PC so that the CPSR is also
14270 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14271 if (live_regs_mask == (1U << reg))
14274 if (reg <= LAST_ARM_REGNUM
14275 && (reg != LR_REGNUM
14277 || ! IS_INTERRUPT (func_type)))
14279 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14280 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14287 /* Generate the load multiple instruction to restore the
14288 registers. Note we can get here, even if
14289 frame_pointer_needed is true, but only if sp already
14290 points to the base of the saved core registers. */
14291 if (live_regs_mask & (1 << SP_REGNUM))
14293 unsigned HOST_WIDE_INT stack_adjust;
14295 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14296 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14298 if (stack_adjust && arm_arch5 && TARGET_ARM)
14299 if (TARGET_UNIFIED_ASM)
14300 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14302 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14305 /* If we can't use ldmib (SA110 bug),
14306 then try to pop r3 instead. */
14308 live_regs_mask |= 1 << 3;
14310 if (TARGET_UNIFIED_ASM)
14311 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14313 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14317 if (TARGET_UNIFIED_ASM)
14318 sprintf (instr, "pop%s\t{", conditional);
14320 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14322 p = instr + strlen (instr);
14324 for (reg = 0; reg <= SP_REGNUM; reg++)
14325 if (live_regs_mask & (1 << reg))
14327 int l = strlen (reg_names[reg]);
14333 memcpy (p, ", ", 2);
14337 memcpy (p, "%|", 2);
14338 memcpy (p + 2, reg_names[reg], l);
14342 if (live_regs_mask & (1 << LR_REGNUM))
14344 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14345 /* If returning from an interrupt, restore the CPSR. */
14346 if (IS_INTERRUPT (func_type))
14353 output_asm_insn (instr, & operand);
14355 /* See if we need to generate an extra instruction to
14356 perform the actual function return. */
14358 && func_type != ARM_FT_INTERWORKED
14359 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14361 /* The return has already been handled
14362 by loading the LR into the PC. */
14369 switch ((int) ARM_FUNC_TYPE (func_type))
14373 /* ??? This is wrong for unified assembly syntax. */
14374 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14377 case ARM_FT_INTERWORKED:
14378 sprintf (instr, "bx%s\t%%|lr", conditional);
14381 case ARM_FT_EXCEPTION:
14382 /* ??? This is wrong for unified assembly syntax. */
14383 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14387 /* Use bx if it's available. */
14388 if (arm_arch5 || arm_arch4t)
14389 sprintf (instr, "bx%s\t%%|lr", conditional);
14391 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14395 output_asm_insn (instr, & operand);
14401 /* Write the function name into the code section, directly preceding
14402 the function prologue.
14404 Code will be output similar to this:
14406 .ascii "arm_poke_function_name", 0
14409 .word 0xff000000 + (t1 - t0)
14410 arm_poke_function_name
14412 stmfd sp!, {fp, ip, lr, pc}
14415 When performing a stack backtrace, code can inspect the value
14416 of 'pc' stored at 'fp' + 0. If the trace function then looks
14417 at location pc - 12 and the top 8 bits are set, then we know
14418 that there is a function name embedded immediately preceding this
14419 location and has length ((pc[-3]) & 0xff000000).
14421 We assume that pc is declared as a pointer to an unsigned long.
14423 It is of no benefit to output the function name if we are assembling
14424 a leaf function. These function types will not contain a stack
14425 backtrace structure, therefore it is not possible to determine the
14428 arm_poke_function_name (FILE *stream, const char *name)
14430 unsigned long alignlength;
14431 unsigned long length;
14434 length = strlen (name) + 1;
14435 alignlength = ROUND_UP_WORD (length);
14437 ASM_OUTPUT_ASCII (stream, name, length);
14438 ASM_OUTPUT_ALIGN (stream, 2);
14439 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14440 assemble_aligned_integer (UNITS_PER_WORD, x);
14443 /* Place some comments into the assembler stream
14444 describing the current function. */
14446 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14448 unsigned long func_type;
14452 thumb1_output_function_prologue (f, frame_size);
14456 /* Sanity check. */
14457 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14459 func_type = arm_current_func_type ();
14461 switch ((int) ARM_FUNC_TYPE (func_type))
14464 case ARM_FT_NORMAL:
14466 case ARM_FT_INTERWORKED:
14467 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14470 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14473 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14475 case ARM_FT_EXCEPTION:
14476 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14480 if (IS_NAKED (func_type))
14481 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14483 if (IS_VOLATILE (func_type))
14484 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14486 if (IS_NESTED (func_type))
14487 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14488 if (IS_STACKALIGN (func_type))
14489 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14491 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14493 crtl->args.pretend_args_size, frame_size);
14495 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14496 frame_pointer_needed,
14497 cfun->machine->uses_anonymous_args);
14499 if (cfun->machine->lr_save_eliminated)
14500 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14502 if (crtl->calls_eh_return)
14503 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14508 arm_output_epilogue (rtx sibling)
14511 unsigned long saved_regs_mask;
14512 unsigned long func_type;
14513 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14514 frame that is $fp + 4 for a non-variadic function. */
14515 int floats_offset = 0;
14517 FILE * f = asm_out_file;
14518 unsigned int lrm_count = 0;
14519 int really_return = (sibling == NULL);
14521 arm_stack_offsets *offsets;
14523 /* If we have already generated the return instruction
14524 then it is futile to generate anything else. */
14525 if (use_return_insn (FALSE, sibling) &&
14526 (cfun->machine->return_used_this_function != 0))
14529 func_type = arm_current_func_type ();
14531 if (IS_NAKED (func_type))
14532 /* Naked functions don't have epilogues. */
14535 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14539 /* A volatile function should never return. Call abort. */
14540 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14541 assemble_external_libcall (op);
14542 output_asm_insn ("bl\t%a0", &op);
14547 /* If we are throwing an exception, then we really must be doing a
14548 return, so we can't tail-call. */
14549 gcc_assert (!crtl->calls_eh_return || really_return);
14551 offsets = arm_get_frame_offsets ();
14552 saved_regs_mask = offsets->saved_regs_mask;
14555 lrm_count = bit_count (saved_regs_mask);
14557 floats_offset = offsets->saved_args;
14558 /* Compute how far away the floats will be. */
14559 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14560 if (saved_regs_mask & (1 << reg))
14561 floats_offset += 4;
14563 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14565 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14566 int vfp_offset = offsets->frame;
14568 if (TARGET_FPA_EMU2)
14570 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14571 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14573 floats_offset += 12;
14574 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14575 reg, FP_REGNUM, floats_offset - vfp_offset);
14580 start_reg = LAST_FPA_REGNUM;
14582 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14584 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14586 floats_offset += 12;
14588 /* We can't unstack more than four registers at once. */
14589 if (start_reg - reg == 3)
14591 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14592 reg, FP_REGNUM, floats_offset - vfp_offset);
14593 start_reg = reg - 1;
14598 if (reg != start_reg)
14599 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14600 reg + 1, start_reg - reg,
14601 FP_REGNUM, floats_offset - vfp_offset);
14602 start_reg = reg - 1;
14606 /* Just in case the last register checked also needs unstacking. */
14607 if (reg != start_reg)
14608 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14609 reg + 1, start_reg - reg,
14610 FP_REGNUM, floats_offset - vfp_offset);
14613 if (TARGET_HARD_FLOAT && TARGET_VFP)
14617 /* The fldmd insns do not have base+offset addressing
14618 modes, so we use IP to hold the address. */
14619 saved_size = arm_get_vfp_saved_size ();
14621 if (saved_size > 0)
14623 floats_offset += saved_size;
14624 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14625 FP_REGNUM, floats_offset - vfp_offset);
14627 start_reg = FIRST_VFP_REGNUM;
14628 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14630 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14631 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14633 if (start_reg != reg)
14634 vfp_output_fldmd (f, IP_REGNUM,
14635 (start_reg - FIRST_VFP_REGNUM) / 2,
14636 (reg - start_reg) / 2);
14637 start_reg = reg + 2;
14640 if (start_reg != reg)
14641 vfp_output_fldmd (f, IP_REGNUM,
14642 (start_reg - FIRST_VFP_REGNUM) / 2,
14643 (reg - start_reg) / 2);
14648 /* The frame pointer is guaranteed to be non-double-word aligned.
14649 This is because it is set to (old_stack_pointer - 4) and the
14650 old_stack_pointer was double word aligned. Thus the offset to
14651 the iWMMXt registers to be loaded must also be non-double-word
14652 sized, so that the resultant address *is* double-word aligned.
14653 We can ignore floats_offset since that was already included in
14654 the live_regs_mask. */
14655 lrm_count += (lrm_count % 2 ? 2 : 1);
14657 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14658 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14660 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14661 reg, FP_REGNUM, lrm_count * 4);
14666 /* saved_regs_mask should contain the IP, which at the time of stack
14667 frame generation actually contains the old stack pointer. So a
14668 quick way to unwind the stack is just pop the IP register directly
14669 into the stack pointer. */
14670 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14671 saved_regs_mask &= ~ (1 << IP_REGNUM);
14672 saved_regs_mask |= (1 << SP_REGNUM);
14674 /* There are two registers left in saved_regs_mask - LR and PC. We
14675 only need to restore the LR register (the return address), but to
14676 save time we can load it directly into the PC, unless we need a
14677 special function exit sequence, or we are not really returning. */
14679 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14680 && !crtl->calls_eh_return)
14681 /* Delete the LR from the register mask, so that the LR on
14682 the stack is loaded into the PC in the register mask. */
14683 saved_regs_mask &= ~ (1 << LR_REGNUM);
14685 saved_regs_mask &= ~ (1 << PC_REGNUM);
14687 /* We must use SP as the base register, because SP is one of the
14688 registers being restored. If an interrupt or page fault
14689 happens in the ldm instruction, the SP might or might not
14690 have been restored. That would be bad, as then SP will no
14691 longer indicate the safe area of stack, and we can get stack
14692 corruption. Using SP as the base register means that it will
14693 be reset correctly to the original value, should an interrupt
14694 occur. If the stack pointer already points at the right
14695 place, then omit the subtraction. */
14696 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14697 || cfun->calls_alloca)
14698 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14699 4 * bit_count (saved_regs_mask));
14700 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14702 if (IS_INTERRUPT (func_type))
14703 /* Interrupt handlers will have pushed the
14704 IP onto the stack, so restore it now. */
14705 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14709 /* This branch is executed for ARM mode (non-apcs frames) and
14710 Thumb-2 mode. Frame layout is essentially the same for those
14711 cases, except that in ARM mode frame pointer points to the
14712 first saved register, while in Thumb-2 mode the frame pointer points
14713 to the last saved register.
14715 It is possible to make frame pointer point to last saved
14716 register in both cases, and remove some conditionals below.
14717 That means that fp setup in prologue would be just "mov fp, sp"
14718 and sp restore in epilogue would be just "mov sp, fp", whereas
14719 now we have to use add/sub in those cases. However, the value
14720 of that would be marginal, as both mov and add/sub are 32-bit
14721 in ARM mode, and it would require extra conditionals
14722 in arm_expand_prologue to distingish ARM-apcs-frame case
14723 (where frame pointer is required to point at first register)
14724 and ARM-non-apcs-frame. Therefore, such change is postponed
14725 until real need arise. */
14726 unsigned HOST_WIDE_INT amount;
14728 /* Restore stack pointer if necessary. */
14729 if (TARGET_ARM && frame_pointer_needed)
14731 operands[0] = stack_pointer_rtx;
14732 operands[1] = hard_frame_pointer_rtx;
14734 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14735 output_add_immediate (operands);
14739 if (frame_pointer_needed)
14741 /* For Thumb-2 restore sp from the frame pointer.
14742 Operand restrictions mean we have to incrememnt FP, then copy
14744 amount = offsets->locals_base - offsets->saved_regs;
14745 operands[0] = hard_frame_pointer_rtx;
14749 unsigned long count;
14750 operands[0] = stack_pointer_rtx;
14751 amount = offsets->outgoing_args - offsets->saved_regs;
14752 /* pop call clobbered registers if it avoids a
14753 separate stack adjustment. */
14754 count = offsets->saved_regs - offsets->saved_args;
14757 && !crtl->calls_eh_return
14758 && bit_count(saved_regs_mask) * 4 == count
14759 && !IS_INTERRUPT (func_type)
14760 && !crtl->tail_call_emit)
14762 unsigned long mask;
14763 /* Preserve return values, of any size. */
14764 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14766 mask &= ~saved_regs_mask;
14768 while (bit_count (mask) * 4 > amount)
14770 while ((mask & (1 << reg)) == 0)
14772 mask &= ~(1 << reg);
14774 if (bit_count (mask) * 4 == amount) {
14776 saved_regs_mask |= mask;
14783 operands[1] = operands[0];
14784 operands[2] = GEN_INT (amount);
14785 output_add_immediate (operands);
14787 if (frame_pointer_needed)
14788 asm_fprintf (f, "\tmov\t%r, %r\n",
14789 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14792 if (TARGET_FPA_EMU2)
14794 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14795 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14796 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14801 start_reg = FIRST_FPA_REGNUM;
14803 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14805 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14807 if (reg - start_reg == 3)
14809 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14810 start_reg, SP_REGNUM);
14811 start_reg = reg + 1;
14816 if (reg != start_reg)
14817 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14818 start_reg, reg - start_reg,
14821 start_reg = reg + 1;
14825 /* Just in case the last register checked also needs unstacking. */
14826 if (reg != start_reg)
14827 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14828 start_reg, reg - start_reg, SP_REGNUM);
14831 if (TARGET_HARD_FLOAT && TARGET_VFP)
14833 int end_reg = LAST_VFP_REGNUM + 1;
14835 /* Scan the registers in reverse order. We need to match
14836 any groupings made in the prologue and generate matching
14838 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14840 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14841 && (!df_regs_ever_live_p (reg + 1)
14842 || call_used_regs[reg + 1]))
14844 if (end_reg > reg + 2)
14845 vfp_output_fldmd (f, SP_REGNUM,
14846 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14847 (end_reg - (reg + 2)) / 2);
14851 if (end_reg > reg + 2)
14852 vfp_output_fldmd (f, SP_REGNUM, 0,
14853 (end_reg - (reg + 2)) / 2);
14857 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14858 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14859 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14861 /* If we can, restore the LR into the PC. */
14862 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14863 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14864 && !IS_STACKALIGN (func_type)
14866 && crtl->args.pretend_args_size == 0
14867 && saved_regs_mask & (1 << LR_REGNUM)
14868 && !crtl->calls_eh_return)
14870 saved_regs_mask &= ~ (1 << LR_REGNUM);
14871 saved_regs_mask |= (1 << PC_REGNUM);
14872 rfe = IS_INTERRUPT (func_type);
14877 /* Load the registers off the stack. If we only have one register
14878 to load use the LDR instruction - it is faster. For Thumb-2
14879 always use pop and the assembler will pick the best instruction.*/
14880 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14881 && !IS_INTERRUPT(func_type))
14883 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14885 else if (saved_regs_mask)
14887 if (saved_regs_mask & (1 << SP_REGNUM))
14888 /* Note - write back to the stack register is not enabled
14889 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14890 in the list of registers and if we add writeback the
14891 instruction becomes UNPREDICTABLE. */
14892 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14894 else if (TARGET_ARM)
14895 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14898 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14901 if (crtl->args.pretend_args_size)
14903 /* Unwind the pre-pushed regs. */
14904 operands[0] = operands[1] = stack_pointer_rtx;
14905 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14906 output_add_immediate (operands);
14910 /* We may have already restored PC directly from the stack. */
14911 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14914 /* Stack adjustment for exception handler. */
14915 if (crtl->calls_eh_return)
14916 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14917 ARM_EH_STACKADJ_REGNUM);
14919 /* Generate the return instruction. */
14920 switch ((int) ARM_FUNC_TYPE (func_type))
14924 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14927 case ARM_FT_EXCEPTION:
14928 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14931 case ARM_FT_INTERWORKED:
14932 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14936 if (IS_STACKALIGN (func_type))
14938 /* See comment in arm_expand_prologue. */
14939 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14941 if (arm_arch5 || arm_arch4t)
14942 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14944 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14952 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14953 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14955 arm_stack_offsets *offsets;
14961 /* Emit any call-via-reg trampolines that are needed for v4t support
14962 of call_reg and call_value_reg type insns. */
14963 for (regno = 0; regno < LR_REGNUM; regno++)
14965 rtx label = cfun->machine->call_via[regno];
14969 switch_to_section (function_section (current_function_decl));
14970 targetm.asm_out.internal_label (asm_out_file, "L",
14971 CODE_LABEL_NUMBER (label));
14972 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14976 /* ??? Probably not safe to set this here, since it assumes that a
14977 function will be emitted as assembly immediately after we generate
14978 RTL for it. This does not happen for inline functions. */
14979 cfun->machine->return_used_this_function = 0;
14981 else /* TARGET_32BIT */
14983 /* We need to take into account any stack-frame rounding. */
14984 offsets = arm_get_frame_offsets ();
14986 gcc_assert (!use_return_insn (FALSE, NULL)
14987 || (cfun->machine->return_used_this_function != 0)
14988 || offsets->saved_regs == offsets->outgoing_args
14989 || frame_pointer_needed);
14991 /* Reset the ARM-specific per-function variables. */
14992 after_arm_reorg = 0;
14996 /* Generate and emit an insn that we will recognize as a push_multi.
14997 Unfortunately, since this insn does not reflect very well the actual
14998 semantics of the operation, we need to annotate the insn for the benefit
14999 of DWARF2 frame unwind information. */
15001 emit_multi_reg_push (unsigned long mask)
15004 int num_dwarf_regs;
15008 int dwarf_par_index;
15011 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15012 if (mask & (1 << i))
15015 gcc_assert (num_regs && num_regs <= 16);
15017 /* We don't record the PC in the dwarf frame information. */
15018 num_dwarf_regs = num_regs;
15019 if (mask & (1 << PC_REGNUM))
15022 /* For the body of the insn we are going to generate an UNSPEC in
15023 parallel with several USEs. This allows the insn to be recognized
15024 by the push_multi pattern in the arm.md file.
15026 The body of the insn looks something like this:
15029 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15030 (const_int:SI <num>)))
15031 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15037 For the frame note however, we try to be more explicit and actually
15038 show each register being stored into the stack frame, plus a (single)
15039 decrement of the stack pointer. We do it this way in order to be
15040 friendly to the stack unwinding code, which only wants to see a single
15041 stack decrement per instruction. The RTL we generate for the note looks
15042 something like this:
15045 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15046 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15047 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15048 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15052 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15053 instead we'd have a parallel expression detailing all
15054 the stores to the various memory addresses so that debug
15055 information is more up-to-date. Remember however while writing
15056 this to take care of the constraints with the push instruction.
15058 Note also that this has to be taken care of for the VFP registers.
15060 For more see PR43399. */
15062 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15063 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15064 dwarf_par_index = 1;
15066 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15068 if (mask & (1 << i))
15070 reg = gen_rtx_REG (SImode, i);
15072 XVECEXP (par, 0, 0)
15073 = gen_rtx_SET (VOIDmode,
15076 gen_rtx_PRE_MODIFY (Pmode,
15079 (stack_pointer_rtx,
15082 gen_rtx_UNSPEC (BLKmode,
15083 gen_rtvec (1, reg),
15084 UNSPEC_PUSH_MULT));
15086 if (i != PC_REGNUM)
15088 tmp = gen_rtx_SET (VOIDmode,
15089 gen_frame_mem (SImode, stack_pointer_rtx),
15091 RTX_FRAME_RELATED_P (tmp) = 1;
15092 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15100 for (j = 1, i++; j < num_regs; i++)
15102 if (mask & (1 << i))
15104 reg = gen_rtx_REG (SImode, i);
15106 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15108 if (i != PC_REGNUM)
15111 = gen_rtx_SET (VOIDmode,
15114 plus_constant (stack_pointer_rtx,
15117 RTX_FRAME_RELATED_P (tmp) = 1;
15118 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15125 par = emit_insn (par);
15127 tmp = gen_rtx_SET (VOIDmode,
15129 plus_constant (stack_pointer_rtx, -4 * num_regs));
15130 RTX_FRAME_RELATED_P (tmp) = 1;
15131 XVECEXP (dwarf, 0, 0) = tmp;
15133 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15138 /* Calculate the size of the return value that is passed in registers. */
15140 arm_size_return_regs (void)
15142 enum machine_mode mode;
15144 if (crtl->return_rtx != 0)
15145 mode = GET_MODE (crtl->return_rtx);
15147 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15149 return GET_MODE_SIZE (mode);
15153 emit_sfm (int base_reg, int count)
15160 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15161 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15163 reg = gen_rtx_REG (XFmode, base_reg++);
15165 XVECEXP (par, 0, 0)
15166 = gen_rtx_SET (VOIDmode,
15169 gen_rtx_PRE_MODIFY (Pmode,
15172 (stack_pointer_rtx,
15175 gen_rtx_UNSPEC (BLKmode,
15176 gen_rtvec (1, reg),
15177 UNSPEC_PUSH_MULT));
15178 tmp = gen_rtx_SET (VOIDmode,
15179 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15180 RTX_FRAME_RELATED_P (tmp) = 1;
15181 XVECEXP (dwarf, 0, 1) = tmp;
15183 for (i = 1; i < count; i++)
15185 reg = gen_rtx_REG (XFmode, base_reg++);
15186 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15188 tmp = gen_rtx_SET (VOIDmode,
15189 gen_frame_mem (XFmode,
15190 plus_constant (stack_pointer_rtx,
15193 RTX_FRAME_RELATED_P (tmp) = 1;
15194 XVECEXP (dwarf, 0, i + 1) = tmp;
15197 tmp = gen_rtx_SET (VOIDmode,
15199 plus_constant (stack_pointer_rtx, -12 * count));
15201 RTX_FRAME_RELATED_P (tmp) = 1;
15202 XVECEXP (dwarf, 0, 0) = tmp;
15204 par = emit_insn (par);
15205 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15211 /* Return true if the current function needs to save/restore LR. */
15214 thumb_force_lr_save (void)
15216 return !cfun->machine->lr_save_eliminated
15217 && (!leaf_function_p ()
15218 || thumb_far_jump_used_p ()
15219 || df_regs_ever_live_p (LR_REGNUM));
15223 /* Return true if r3 is used by any of the tail call insns in the
15224 current function. */
15227 any_sibcall_uses_r3 (void)
15232 if (!crtl->tail_call_emit)
15234 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15235 if (e->flags & EDGE_SIBCALL)
15237 rtx call = BB_END (e->src);
15238 if (!CALL_P (call))
15239 call = prev_nonnote_nondebug_insn (call);
15240 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15241 if (find_regno_fusage (call, USE, 3))
15248 /* Compute the distance from register FROM to register TO.
15249 These can be the arg pointer (26), the soft frame pointer (25),
15250 the stack pointer (13) or the hard frame pointer (11).
15251 In thumb mode r7 is used as the soft frame pointer, if needed.
15252 Typical stack layout looks like this:
15254 old stack pointer -> | |
15257 | | saved arguments for
15258 | | vararg functions
15261 hard FP & arg pointer -> | | \
15269 soft frame pointer -> | | /
15274 locals base pointer -> | | /
15279 current stack pointer -> | | /
15282 For a given function some or all of these stack components
15283 may not be needed, giving rise to the possibility of
15284 eliminating some of the registers.
15286 The values returned by this function must reflect the behavior
15287 of arm_expand_prologue() and arm_compute_save_reg_mask().
15289 The sign of the number returned reflects the direction of stack
15290 growth, so the values are positive for all eliminations except
15291 from the soft frame pointer to the hard frame pointer.
15293 SFP may point just inside the local variables block to ensure correct
15297 /* Calculate stack offsets. These are used to calculate register elimination
15298 offsets and in prologue/epilogue code. Also calculates which registers
15299 should be saved. */
15301 static arm_stack_offsets *
15302 arm_get_frame_offsets (void)
15304 struct arm_stack_offsets *offsets;
15305 unsigned long func_type;
15309 HOST_WIDE_INT frame_size;
15312 offsets = &cfun->machine->stack_offsets;
15314 /* We need to know if we are a leaf function. Unfortunately, it
15315 is possible to be called after start_sequence has been called,
15316 which causes get_insns to return the insns for the sequence,
15317 not the function, which will cause leaf_function_p to return
15318 the incorrect result.
15320 to know about leaf functions once reload has completed, and the
15321 frame size cannot be changed after that time, so we can safely
15322 use the cached value. */
15324 if (reload_completed)
15327 /* Initially this is the size of the local variables. It will translated
15328 into an offset once we have determined the size of preceding data. */
15329 frame_size = ROUND_UP_WORD (get_frame_size ());
15331 leaf = leaf_function_p ();
15333 /* Space for variadic functions. */
15334 offsets->saved_args = crtl->args.pretend_args_size;
15336 /* In Thumb mode this is incorrect, but never used. */
15337 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15338 arm_compute_static_chain_stack_bytes();
15342 unsigned int regno;
15344 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15345 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15346 saved = core_saved;
15348 /* We know that SP will be doubleword aligned on entry, and we must
15349 preserve that condition at any subroutine call. We also require the
15350 soft frame pointer to be doubleword aligned. */
15352 if (TARGET_REALLY_IWMMXT)
15354 /* Check for the call-saved iWMMXt registers. */
15355 for (regno = FIRST_IWMMXT_REGNUM;
15356 regno <= LAST_IWMMXT_REGNUM;
15358 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15362 func_type = arm_current_func_type ();
15363 if (! IS_VOLATILE (func_type))
15365 /* Space for saved FPA registers. */
15366 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15367 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15370 /* Space for saved VFP registers. */
15371 if (TARGET_HARD_FLOAT && TARGET_VFP)
15372 saved += arm_get_vfp_saved_size ();
15375 else /* TARGET_THUMB1 */
15377 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15378 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15379 saved = core_saved;
15380 if (TARGET_BACKTRACE)
15384 /* Saved registers include the stack frame. */
15385 offsets->saved_regs = offsets->saved_args + saved +
15386 arm_compute_static_chain_stack_bytes();
15387 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15388 /* A leaf function does not need any stack alignment if it has nothing
15390 if (leaf && frame_size == 0
15391 /* However if it calls alloca(), we have a dynamically allocated
15392 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15393 && ! cfun->calls_alloca)
15395 offsets->outgoing_args = offsets->soft_frame;
15396 offsets->locals_base = offsets->soft_frame;
15400 /* Ensure SFP has the correct alignment. */
15401 if (ARM_DOUBLEWORD_ALIGN
15402 && (offsets->soft_frame & 7))
15404 offsets->soft_frame += 4;
15405 /* Try to align stack by pushing an extra reg. Don't bother doing this
15406 when there is a stack frame as the alignment will be rolled into
15407 the normal stack adjustment. */
15408 if (frame_size + crtl->outgoing_args_size == 0)
15412 /* If it is safe to use r3, then do so. This sometimes
15413 generates better code on Thumb-2 by avoiding the need to
15414 use 32-bit push/pop instructions. */
15415 if (! any_sibcall_uses_r3 ()
15416 && arm_size_return_regs () <= 12
15417 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15422 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15424 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15433 offsets->saved_regs += 4;
15434 offsets->saved_regs_mask |= (1 << reg);
15439 offsets->locals_base = offsets->soft_frame + frame_size;
15440 offsets->outgoing_args = (offsets->locals_base
15441 + crtl->outgoing_args_size);
15443 if (ARM_DOUBLEWORD_ALIGN)
15445 /* Ensure SP remains doubleword aligned. */
15446 if (offsets->outgoing_args & 7)
15447 offsets->outgoing_args += 4;
15448 gcc_assert (!(offsets->outgoing_args & 7));
15455 /* Calculate the relative offsets for the different stack pointers. Positive
15456 offsets are in the direction of stack growth. */
15459 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15461 arm_stack_offsets *offsets;
15463 offsets = arm_get_frame_offsets ();
15465 /* OK, now we have enough information to compute the distances.
15466 There must be an entry in these switch tables for each pair
15467 of registers in ELIMINABLE_REGS, even if some of the entries
15468 seem to be redundant or useless. */
15471 case ARG_POINTER_REGNUM:
15474 case THUMB_HARD_FRAME_POINTER_REGNUM:
15477 case FRAME_POINTER_REGNUM:
15478 /* This is the reverse of the soft frame pointer
15479 to hard frame pointer elimination below. */
15480 return offsets->soft_frame - offsets->saved_args;
15482 case ARM_HARD_FRAME_POINTER_REGNUM:
15483 /* This is only non-zero in the case where the static chain register
15484 is stored above the frame. */
15485 return offsets->frame - offsets->saved_args - 4;
15487 case STACK_POINTER_REGNUM:
15488 /* If nothing has been pushed on the stack at all
15489 then this will return -4. This *is* correct! */
15490 return offsets->outgoing_args - (offsets->saved_args + 4);
15493 gcc_unreachable ();
15495 gcc_unreachable ();
15497 case FRAME_POINTER_REGNUM:
15500 case THUMB_HARD_FRAME_POINTER_REGNUM:
15503 case ARM_HARD_FRAME_POINTER_REGNUM:
15504 /* The hard frame pointer points to the top entry in the
15505 stack frame. The soft frame pointer to the bottom entry
15506 in the stack frame. If there is no stack frame at all,
15507 then they are identical. */
15509 return offsets->frame - offsets->soft_frame;
15511 case STACK_POINTER_REGNUM:
15512 return offsets->outgoing_args - offsets->soft_frame;
15515 gcc_unreachable ();
15517 gcc_unreachable ();
15520 /* You cannot eliminate from the stack pointer.
15521 In theory you could eliminate from the hard frame
15522 pointer to the stack pointer, but this will never
15523 happen, since if a stack frame is not needed the
15524 hard frame pointer will never be used. */
15525 gcc_unreachable ();
15529 /* Given FROM and TO register numbers, say whether this elimination is
15530 allowed. Frame pointer elimination is automatically handled.
15532 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15533 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15534 pointer, we must eliminate FRAME_POINTER_REGNUM into
15535 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15536 ARG_POINTER_REGNUM. */
15539 arm_can_eliminate (const int from, const int to)
15541 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15542 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15543 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15544 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15548 /* Emit RTL to save coprocessor registers on function entry. Returns the
15549 number of bytes pushed. */
15552 arm_save_coproc_regs(void)
15554 int saved_size = 0;
15556 unsigned start_reg;
15559 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15560 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15562 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15563 insn = gen_rtx_MEM (V2SImode, insn);
15564 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15565 RTX_FRAME_RELATED_P (insn) = 1;
15569 /* Save any floating point call-saved registers used by this
15571 if (TARGET_FPA_EMU2)
15573 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15574 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15576 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15577 insn = gen_rtx_MEM (XFmode, insn);
15578 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15579 RTX_FRAME_RELATED_P (insn) = 1;
15585 start_reg = LAST_FPA_REGNUM;
15587 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15589 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15591 if (start_reg - reg == 3)
15593 insn = emit_sfm (reg, 4);
15594 RTX_FRAME_RELATED_P (insn) = 1;
15596 start_reg = reg - 1;
15601 if (start_reg != reg)
15603 insn = emit_sfm (reg + 1, start_reg - reg);
15604 RTX_FRAME_RELATED_P (insn) = 1;
15605 saved_size += (start_reg - reg) * 12;
15607 start_reg = reg - 1;
15611 if (start_reg != reg)
15613 insn = emit_sfm (reg + 1, start_reg - reg);
15614 saved_size += (start_reg - reg) * 12;
15615 RTX_FRAME_RELATED_P (insn) = 1;
15618 if (TARGET_HARD_FLOAT && TARGET_VFP)
15620 start_reg = FIRST_VFP_REGNUM;
15622 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15624 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15625 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15627 if (start_reg != reg)
15628 saved_size += vfp_emit_fstmd (start_reg,
15629 (reg - start_reg) / 2);
15630 start_reg = reg + 2;
15633 if (start_reg != reg)
15634 saved_size += vfp_emit_fstmd (start_reg,
15635 (reg - start_reg) / 2);
15641 /* Set the Thumb frame pointer from the stack pointer. */
15644 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15646 HOST_WIDE_INT amount;
15649 amount = offsets->outgoing_args - offsets->locals_base;
15651 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15652 stack_pointer_rtx, GEN_INT (amount)));
15655 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15656 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15657 expects the first two operands to be the same. */
15660 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15662 hard_frame_pointer_rtx));
15666 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15667 hard_frame_pointer_rtx,
15668 stack_pointer_rtx));
15670 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15671 plus_constant (stack_pointer_rtx, amount));
15672 RTX_FRAME_RELATED_P (dwarf) = 1;
15673 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15676 RTX_FRAME_RELATED_P (insn) = 1;
15679 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15682 arm_expand_prologue (void)
15687 unsigned long live_regs_mask;
15688 unsigned long func_type;
15690 int saved_pretend_args = 0;
15691 int saved_regs = 0;
15692 unsigned HOST_WIDE_INT args_to_push;
15693 arm_stack_offsets *offsets;
15695 func_type = arm_current_func_type ();
15697 /* Naked functions don't have prologues. */
15698 if (IS_NAKED (func_type))
15701 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15702 args_to_push = crtl->args.pretend_args_size;
15704 /* Compute which register we will have to save onto the stack. */
15705 offsets = arm_get_frame_offsets ();
15706 live_regs_mask = offsets->saved_regs_mask;
15708 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15710 if (IS_STACKALIGN (func_type))
15715 /* Handle a word-aligned stack pointer. We generate the following:
15720 <save and restore r0 in normal prologue/epilogue>
15724 The unwinder doesn't need to know about the stack realignment.
15725 Just tell it we saved SP in r0. */
15726 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15728 r0 = gen_rtx_REG (SImode, 0);
15729 r1 = gen_rtx_REG (SImode, 1);
15730 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15731 compiler won't choke. */
15732 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15733 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15734 insn = gen_movsi (r0, stack_pointer_rtx);
15735 RTX_FRAME_RELATED_P (insn) = 1;
15736 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15738 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15739 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15742 /* For APCS frames, if IP register is clobbered
15743 when creating frame, save that register in a special
15745 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15747 if (IS_INTERRUPT (func_type))
15749 /* Interrupt functions must not corrupt any registers.
15750 Creating a frame pointer however, corrupts the IP
15751 register, so we must push it first. */
15752 insn = emit_multi_reg_push (1 << IP_REGNUM);
15754 /* Do not set RTX_FRAME_RELATED_P on this insn.
15755 The dwarf stack unwinding code only wants to see one
15756 stack decrement per function, and this is not it. If
15757 this instruction is labeled as being part of the frame
15758 creation sequence then dwarf2out_frame_debug_expr will
15759 die when it encounters the assignment of IP to FP
15760 later on, since the use of SP here establishes SP as
15761 the CFA register and not IP.
15763 Anyway this instruction is not really part of the stack
15764 frame creation although it is part of the prologue. */
15766 else if (IS_NESTED (func_type))
15768 /* The Static chain register is the same as the IP register
15769 used as a scratch register during stack frame creation.
15770 To get around this need to find somewhere to store IP
15771 whilst the frame is being created. We try the following
15774 1. The last argument register.
15775 2. A slot on the stack above the frame. (This only
15776 works if the function is not a varargs function).
15777 3. Register r3, after pushing the argument registers
15780 Note - we only need to tell the dwarf2 backend about the SP
15781 adjustment in the second variant; the static chain register
15782 doesn't need to be unwound, as it doesn't contain a value
15783 inherited from the caller. */
15785 if (df_regs_ever_live_p (3) == false)
15786 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15787 else if (args_to_push == 0)
15791 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15794 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15795 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15798 /* Just tell the dwarf backend that we adjusted SP. */
15799 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15800 plus_constant (stack_pointer_rtx,
15802 RTX_FRAME_RELATED_P (insn) = 1;
15803 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15807 /* Store the args on the stack. */
15808 if (cfun->machine->uses_anonymous_args)
15809 insn = emit_multi_reg_push
15810 ((0xf0 >> (args_to_push / 4)) & 0xf);
15813 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15814 GEN_INT (- args_to_push)));
15816 RTX_FRAME_RELATED_P (insn) = 1;
15818 saved_pretend_args = 1;
15819 fp_offset = args_to_push;
15822 /* Now reuse r3 to preserve IP. */
15823 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15827 insn = emit_set_insn (ip_rtx,
15828 plus_constant (stack_pointer_rtx, fp_offset));
15829 RTX_FRAME_RELATED_P (insn) = 1;
15834 /* Push the argument registers, or reserve space for them. */
15835 if (cfun->machine->uses_anonymous_args)
15836 insn = emit_multi_reg_push
15837 ((0xf0 >> (args_to_push / 4)) & 0xf);
15840 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15841 GEN_INT (- args_to_push)));
15842 RTX_FRAME_RELATED_P (insn) = 1;
15845 /* If this is an interrupt service routine, and the link register
15846 is going to be pushed, and we're not generating extra
15847 push of IP (needed when frame is needed and frame layout if apcs),
15848 subtracting four from LR now will mean that the function return
15849 can be done with a single instruction. */
15850 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15851 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15852 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15855 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15857 emit_set_insn (lr, plus_constant (lr, -4));
15860 if (live_regs_mask)
15862 saved_regs += bit_count (live_regs_mask) * 4;
15863 if (optimize_size && !frame_pointer_needed
15864 && saved_regs == offsets->saved_regs - offsets->saved_args)
15866 /* If no coprocessor registers are being pushed and we don't have
15867 to worry about a frame pointer then push extra registers to
15868 create the stack frame. This is done is a way that does not
15869 alter the frame layout, so is independent of the epilogue. */
15873 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15875 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15876 if (frame && n * 4 >= frame)
15879 live_regs_mask |= (1 << n) - 1;
15880 saved_regs += frame;
15883 insn = emit_multi_reg_push (live_regs_mask);
15884 RTX_FRAME_RELATED_P (insn) = 1;
15887 if (! IS_VOLATILE (func_type))
15888 saved_regs += arm_save_coproc_regs ();
15890 if (frame_pointer_needed && TARGET_ARM)
15892 /* Create the new frame pointer. */
15893 if (TARGET_APCS_FRAME)
15895 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15896 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15897 RTX_FRAME_RELATED_P (insn) = 1;
15899 if (IS_NESTED (func_type))
15901 /* Recover the static chain register. */
15902 if (!df_regs_ever_live_p (3)
15903 || saved_pretend_args)
15904 insn = gen_rtx_REG (SImode, 3);
15905 else /* if (crtl->args.pretend_args_size == 0) */
15907 insn = plus_constant (hard_frame_pointer_rtx, 4);
15908 insn = gen_frame_mem (SImode, insn);
15910 emit_set_insn (ip_rtx, insn);
15911 /* Add a USE to stop propagate_one_insn() from barfing. */
15912 emit_insn (gen_prologue_use (ip_rtx));
15917 insn = GEN_INT (saved_regs - 4);
15918 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15919 stack_pointer_rtx, insn));
15920 RTX_FRAME_RELATED_P (insn) = 1;
15924 if (flag_stack_usage)
15925 current_function_static_stack_size
15926 = offsets->outgoing_args - offsets->saved_args;
15928 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15930 /* This add can produce multiple insns for a large constant, so we
15931 need to get tricky. */
15932 rtx last = get_last_insn ();
15934 amount = GEN_INT (offsets->saved_args + saved_regs
15935 - offsets->outgoing_args);
15937 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15941 last = last ? NEXT_INSN (last) : get_insns ();
15942 RTX_FRAME_RELATED_P (last) = 1;
15944 while (last != insn);
15946 /* If the frame pointer is needed, emit a special barrier that
15947 will prevent the scheduler from moving stores to the frame
15948 before the stack adjustment. */
15949 if (frame_pointer_needed)
15950 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15951 hard_frame_pointer_rtx));
15955 if (frame_pointer_needed && TARGET_THUMB2)
15956 thumb_set_frame_pointer (offsets);
15958 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15960 unsigned long mask;
15962 mask = live_regs_mask;
15963 mask &= THUMB2_WORK_REGS;
15964 if (!IS_NESTED (func_type))
15965 mask |= (1 << IP_REGNUM);
15966 arm_load_pic_register (mask);
15969 /* If we are profiling, make sure no instructions are scheduled before
15970 the call to mcount. Similarly if the user has requested no
15971 scheduling in the prolog. Similarly if we want non-call exceptions
15972 using the EABI unwinder, to prevent faulting instructions from being
15973 swapped with a stack adjustment. */
15974 if (crtl->profile || !TARGET_SCHED_PROLOG
15975 || (arm_except_unwind_info (&global_options) == UI_TARGET
15976 && cfun->can_throw_non_call_exceptions))
15977 emit_insn (gen_blockage ());
15979 /* If the link register is being kept alive, with the return address in it,
15980 then make sure that it does not get reused by the ce2 pass. */
15981 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15982 cfun->machine->lr_save_eliminated = 1;
15985 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15987 arm_print_condition (FILE *stream)
15989 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15991 /* Branch conversion is not implemented for Thumb-2. */
15994 output_operand_lossage ("predicated Thumb instruction");
15997 if (current_insn_predicate != NULL)
15999 output_operand_lossage
16000 ("predicated instruction in conditional sequence");
16004 fputs (arm_condition_codes[arm_current_cc], stream);
16006 else if (current_insn_predicate)
16008 enum arm_cond_code code;
16012 output_operand_lossage ("predicated Thumb instruction");
16016 code = get_arm_condition_code (current_insn_predicate);
16017 fputs (arm_condition_codes[code], stream);
16022 /* If CODE is 'd', then the X is a condition operand and the instruction
16023 should only be executed if the condition is true.
16024 if CODE is 'D', then the X is a condition operand and the instruction
16025 should only be executed if the condition is false: however, if the mode
16026 of the comparison is CCFPEmode, then always execute the instruction -- we
16027 do this because in these circumstances !GE does not necessarily imply LT;
16028 in these cases the instruction pattern will take care to make sure that
16029 an instruction containing %d will follow, thereby undoing the effects of
16030 doing this instruction unconditionally.
16031 If CODE is 'N' then X is a floating point operand that must be negated
16033 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16034 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16036 arm_print_operand (FILE *stream, rtx x, int code)
16041 fputs (ASM_COMMENT_START, stream);
16045 fputs (user_label_prefix, stream);
16049 fputs (REGISTER_PREFIX, stream);
16053 arm_print_condition (stream);
16057 /* Nothing in unified syntax, otherwise the current condition code. */
16058 if (!TARGET_UNIFIED_ASM)
16059 arm_print_condition (stream);
16063 /* The current condition code in unified syntax, otherwise nothing. */
16064 if (TARGET_UNIFIED_ASM)
16065 arm_print_condition (stream);
16069 /* The current condition code for a condition code setting instruction.
16070 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16071 if (TARGET_UNIFIED_ASM)
16073 fputc('s', stream);
16074 arm_print_condition (stream);
16078 arm_print_condition (stream);
16079 fputc('s', stream);
16084 /* If the instruction is conditionally executed then print
16085 the current condition code, otherwise print 's'. */
16086 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16087 if (current_insn_predicate)
16088 arm_print_condition (stream);
16090 fputc('s', stream);
16093 /* %# is a "break" sequence. It doesn't output anything, but is used to
16094 separate e.g. operand numbers from following text, if that text consists
16095 of further digits which we don't want to be part of the operand
16103 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16104 r = real_value_negate (&r);
16105 fprintf (stream, "%s", fp_const_from_val (&r));
16109 /* An integer or symbol address without a preceding # sign. */
16111 switch (GET_CODE (x))
16114 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16118 output_addr_const (stream, x);
16122 gcc_unreachable ();
16127 if (GET_CODE (x) == CONST_INT)
16130 val = ARM_SIGN_EXTEND (~INTVAL (x));
16131 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16135 putc ('~', stream);
16136 output_addr_const (stream, x);
16141 /* The low 16 bits of an immediate constant. */
16142 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16146 fprintf (stream, "%s", arithmetic_instr (x, 1));
16149 /* Truncate Cirrus shift counts. */
16151 if (GET_CODE (x) == CONST_INT)
16153 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16156 arm_print_operand (stream, x, 0);
16160 fprintf (stream, "%s", arithmetic_instr (x, 0));
16168 if (!shift_operator (x, SImode))
16170 output_operand_lossage ("invalid shift operand");
16174 shift = shift_op (x, &val);
16178 fprintf (stream, ", %s ", shift);
16180 arm_print_operand (stream, XEXP (x, 1), 0);
16182 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16187 /* An explanation of the 'Q', 'R' and 'H' register operands:
16189 In a pair of registers containing a DI or DF value the 'Q'
16190 operand returns the register number of the register containing
16191 the least significant part of the value. The 'R' operand returns
16192 the register number of the register containing the most
16193 significant part of the value.
16195 The 'H' operand returns the higher of the two register numbers.
16196 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16197 same as the 'Q' operand, since the most significant part of the
16198 value is held in the lower number register. The reverse is true
16199 on systems where WORDS_BIG_ENDIAN is false.
16201 The purpose of these operands is to distinguish between cases
16202 where the endian-ness of the values is important (for example
16203 when they are added together), and cases where the endian-ness
16204 is irrelevant, but the order of register operations is important.
16205 For example when loading a value from memory into a register
16206 pair, the endian-ness does not matter. Provided that the value
16207 from the lower memory address is put into the lower numbered
16208 register, and the value from the higher address is put into the
16209 higher numbered register, the load will work regardless of whether
16210 the value being loaded is big-wordian or little-wordian. The
16211 order of the two register loads can matter however, if the address
16212 of the memory location is actually held in one of the registers
16213 being overwritten by the load.
16215 The 'Q' and 'R' constraints are also available for 64-bit
16218 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16220 rtx part = gen_lowpart (SImode, x);
16221 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16225 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16227 output_operand_lossage ("invalid operand for code '%c'", code);
16231 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16235 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16237 enum machine_mode mode = GET_MODE (x);
16240 if (mode == VOIDmode)
16242 part = gen_highpart_mode (SImode, mode, x);
16243 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16247 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16249 output_operand_lossage ("invalid operand for code '%c'", code);
16253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16257 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16259 output_operand_lossage ("invalid operand for code '%c'", code);
16263 asm_fprintf (stream, "%r", REGNO (x) + 1);
16267 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16269 output_operand_lossage ("invalid operand for code '%c'", code);
16273 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16277 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16279 output_operand_lossage ("invalid operand for code '%c'", code);
16283 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16287 asm_fprintf (stream, "%r",
16288 GET_CODE (XEXP (x, 0)) == REG
16289 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16293 asm_fprintf (stream, "{%r-%r}",
16295 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16298 /* Like 'M', but writing doubleword vector registers, for use by Neon
16302 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16303 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16305 asm_fprintf (stream, "{d%d}", regno);
16307 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16312 /* CONST_TRUE_RTX means always -- that's the default. */
16313 if (x == const_true_rtx)
16316 if (!COMPARISON_P (x))
16318 output_operand_lossage ("invalid operand for code '%c'", code);
16322 fputs (arm_condition_codes[get_arm_condition_code (x)],
16327 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16328 want to do that. */
16329 if (x == const_true_rtx)
16331 output_operand_lossage ("instruction never executed");
16334 if (!COMPARISON_P (x))
16336 output_operand_lossage ("invalid operand for code '%c'", code);
16340 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16341 (get_arm_condition_code (x))],
16345 /* Cirrus registers can be accessed in a variety of ways:
16346 single floating point (f)
16347 double floating point (d)
16349 64bit integer (dx). */
16350 case 'W': /* Cirrus register in F mode. */
16351 case 'X': /* Cirrus register in D mode. */
16352 case 'Y': /* Cirrus register in FX mode. */
16353 case 'Z': /* Cirrus register in DX mode. */
16354 gcc_assert (GET_CODE (x) == REG
16355 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16357 fprintf (stream, "mv%s%s",
16359 : code == 'X' ? "d"
16360 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16364 /* Print cirrus register in the mode specified by the register's mode. */
16367 int mode = GET_MODE (x);
16369 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16371 output_operand_lossage ("invalid operand for code '%c'", code);
16375 fprintf (stream, "mv%s%s",
16376 mode == DFmode ? "d"
16377 : mode == SImode ? "fx"
16378 : mode == DImode ? "dx"
16379 : "f", reg_names[REGNO (x)] + 2);
16385 if (GET_CODE (x) != REG
16386 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16387 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16388 /* Bad value for wCG register number. */
16390 output_operand_lossage ("invalid operand for code '%c'", code);
16395 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16398 /* Print an iWMMXt control register name. */
16400 if (GET_CODE (x) != CONST_INT
16402 || INTVAL (x) >= 16)
16403 /* Bad value for wC register number. */
16405 output_operand_lossage ("invalid operand for code '%c'", code);
16411 static const char * wc_reg_names [16] =
16413 "wCID", "wCon", "wCSSF", "wCASF",
16414 "wC4", "wC5", "wC6", "wC7",
16415 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16416 "wC12", "wC13", "wC14", "wC15"
16419 fprintf (stream, wc_reg_names [INTVAL (x)]);
16423 /* Print the high single-precision register of a VFP double-precision
16427 int mode = GET_MODE (x);
16430 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16432 output_operand_lossage ("invalid operand for code '%c'", code);
16437 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16439 output_operand_lossage ("invalid operand for code '%c'", code);
16443 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16447 /* Print a VFP/Neon double precision or quad precision register name. */
16451 int mode = GET_MODE (x);
16452 int is_quad = (code == 'q');
16455 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16457 output_operand_lossage ("invalid operand for code '%c'", code);
16461 if (GET_CODE (x) != REG
16462 || !IS_VFP_REGNUM (REGNO (x)))
16464 output_operand_lossage ("invalid operand for code '%c'", code);
16469 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16470 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16472 output_operand_lossage ("invalid operand for code '%c'", code);
16476 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16477 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16481 /* These two codes print the low/high doubleword register of a Neon quad
16482 register, respectively. For pair-structure types, can also print
16483 low/high quadword registers. */
16487 int mode = GET_MODE (x);
16490 if ((GET_MODE_SIZE (mode) != 16
16491 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16493 output_operand_lossage ("invalid operand for code '%c'", code);
16498 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16500 output_operand_lossage ("invalid operand for code '%c'", code);
16504 if (GET_MODE_SIZE (mode) == 16)
16505 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16506 + (code == 'f' ? 1 : 0));
16508 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16509 + (code == 'f' ? 1 : 0));
16513 /* Print a VFPv3 floating-point constant, represented as an integer
16517 int index = vfp3_const_double_index (x);
16518 gcc_assert (index != -1);
16519 fprintf (stream, "%d", index);
16523 /* Print bits representing opcode features for Neon.
16525 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16526 and polynomials as unsigned.
16528 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16530 Bit 2 is 1 for rounding functions, 0 otherwise. */
16532 /* Identify the type as 's', 'u', 'p' or 'f'. */
16535 HOST_WIDE_INT bits = INTVAL (x);
16536 fputc ("uspf"[bits & 3], stream);
16540 /* Likewise, but signed and unsigned integers are both 'i'. */
16543 HOST_WIDE_INT bits = INTVAL (x);
16544 fputc ("iipf"[bits & 3], stream);
16548 /* As for 'T', but emit 'u' instead of 'p'. */
16551 HOST_WIDE_INT bits = INTVAL (x);
16552 fputc ("usuf"[bits & 3], stream);
16556 /* Bit 2: rounding (vs none). */
16559 HOST_WIDE_INT bits = INTVAL (x);
16560 fputs ((bits & 4) != 0 ? "r" : "", stream);
16564 /* Memory operand for vld1/vst1 instruction. */
16568 bool postinc = FALSE;
16569 unsigned align, modesize, align_bits;
16571 gcc_assert (GET_CODE (x) == MEM);
16572 addr = XEXP (x, 0);
16573 if (GET_CODE (addr) == POST_INC)
16576 addr = XEXP (addr, 0);
16578 asm_fprintf (stream, "[%r", REGNO (addr));
16580 /* We know the alignment of this access, so we can emit a hint in the
16581 instruction (for some alignments) as an aid to the memory subsystem
16583 align = MEM_ALIGN (x) >> 3;
16584 modesize = GET_MODE_SIZE (GET_MODE (x));
16586 /* Only certain alignment specifiers are supported by the hardware. */
16587 if (modesize == 16 && (align % 32) == 0)
16589 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16591 else if ((align % 8) == 0)
16596 if (align_bits != 0)
16597 asm_fprintf (stream, ":%d", align_bits);
16599 asm_fprintf (stream, "]");
16602 fputs("!", stream);
16610 gcc_assert (GET_CODE (x) == MEM);
16611 addr = XEXP (x, 0);
16612 gcc_assert (GET_CODE (addr) == REG);
16613 asm_fprintf (stream, "[%r]", REGNO (addr));
16617 /* Translate an S register number into a D register number and element index. */
16620 int mode = GET_MODE (x);
16623 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16625 output_operand_lossage ("invalid operand for code '%c'", code);
16630 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16632 output_operand_lossage ("invalid operand for code '%c'", code);
16636 regno = regno - FIRST_VFP_REGNUM;
16637 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16641 /* Register specifier for vld1.16/vst1.16. Translate the S register
16642 number into a D register number and element index. */
16645 int mode = GET_MODE (x);
16648 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16650 output_operand_lossage ("invalid operand for code '%c'", code);
16655 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16657 output_operand_lossage ("invalid operand for code '%c'", code);
16661 regno = regno - FIRST_VFP_REGNUM;
16662 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16669 output_operand_lossage ("missing operand");
16673 switch (GET_CODE (x))
16676 asm_fprintf (stream, "%r", REGNO (x));
16680 output_memory_reference_mode = GET_MODE (x);
16681 output_address (XEXP (x, 0));
16688 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16689 sizeof (fpstr), 0, 1);
16690 fprintf (stream, "#%s", fpstr);
16693 fprintf (stream, "#%s", fp_immediate_constant (x));
16697 gcc_assert (GET_CODE (x) != NEG);
16698 fputc ('#', stream);
16699 if (GET_CODE (x) == HIGH)
16701 fputs (":lower16:", stream);
16705 output_addr_const (stream, x);
16711 /* Target hook for printing a memory address. */
16713 arm_print_operand_address (FILE *stream, rtx x)
16717 int is_minus = GET_CODE (x) == MINUS;
16719 if (GET_CODE (x) == REG)
16720 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16721 else if (GET_CODE (x) == PLUS || is_minus)
16723 rtx base = XEXP (x, 0);
16724 rtx index = XEXP (x, 1);
16725 HOST_WIDE_INT offset = 0;
16726 if (GET_CODE (base) != REG
16727 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16729 /* Ensure that BASE is a register. */
16730 /* (one of them must be). */
16731 /* Also ensure the SP is not used as in index register. */
16736 switch (GET_CODE (index))
16739 offset = INTVAL (index);
16742 asm_fprintf (stream, "[%r, #%wd]",
16743 REGNO (base), offset);
16747 asm_fprintf (stream, "[%r, %s%r]",
16748 REGNO (base), is_minus ? "-" : "",
16758 asm_fprintf (stream, "[%r, %s%r",
16759 REGNO (base), is_minus ? "-" : "",
16760 REGNO (XEXP (index, 0)));
16761 arm_print_operand (stream, index, 'S');
16762 fputs ("]", stream);
16767 gcc_unreachable ();
16770 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16771 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16773 extern enum machine_mode output_memory_reference_mode;
16775 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16777 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16778 asm_fprintf (stream, "[%r, #%s%d]!",
16779 REGNO (XEXP (x, 0)),
16780 GET_CODE (x) == PRE_DEC ? "-" : "",
16781 GET_MODE_SIZE (output_memory_reference_mode));
16783 asm_fprintf (stream, "[%r], #%s%d",
16784 REGNO (XEXP (x, 0)),
16785 GET_CODE (x) == POST_DEC ? "-" : "",
16786 GET_MODE_SIZE (output_memory_reference_mode));
16788 else if (GET_CODE (x) == PRE_MODIFY)
16790 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16791 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16792 asm_fprintf (stream, "#%wd]!",
16793 INTVAL (XEXP (XEXP (x, 1), 1)));
16795 asm_fprintf (stream, "%r]!",
16796 REGNO (XEXP (XEXP (x, 1), 1)));
16798 else if (GET_CODE (x) == POST_MODIFY)
16800 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16801 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16802 asm_fprintf (stream, "#%wd",
16803 INTVAL (XEXP (XEXP (x, 1), 1)));
16805 asm_fprintf (stream, "%r",
16806 REGNO (XEXP (XEXP (x, 1), 1)));
16808 else output_addr_const (stream, x);
16812 if (GET_CODE (x) == REG)
16813 asm_fprintf (stream, "[%r]", REGNO (x));
16814 else if (GET_CODE (x) == POST_INC)
16815 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16816 else if (GET_CODE (x) == PLUS)
16818 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16819 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16820 asm_fprintf (stream, "[%r, #%wd]",
16821 REGNO (XEXP (x, 0)),
16822 INTVAL (XEXP (x, 1)));
16824 asm_fprintf (stream, "[%r, %r]",
16825 REGNO (XEXP (x, 0)),
16826 REGNO (XEXP (x, 1)));
16829 output_addr_const (stream, x);
16833 /* Target hook for indicating whether a punctuation character for
16834 TARGET_PRINT_OPERAND is valid. */
16836 arm_print_operand_punct_valid_p (unsigned char code)
16838 return (code == '@' || code == '|' || code == '.'
16839 || code == '(' || code == ')' || code == '#'
16840 || (TARGET_32BIT && (code == '?'))
16841 || (TARGET_THUMB2 && (code == '!'))
16842 || (TARGET_THUMB && (code == '_')));
16845 /* Target hook for assembling integer objects. The ARM version needs to
16846 handle word-sized values specially. */
16848 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16850 enum machine_mode mode;
16852 if (size == UNITS_PER_WORD && aligned_p)
16854 fputs ("\t.word\t", asm_out_file);
16855 output_addr_const (asm_out_file, x);
16857 /* Mark symbols as position independent. We only do this in the
16858 .text segment, not in the .data segment. */
16859 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16860 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16862 /* See legitimize_pic_address for an explanation of the
16863 TARGET_VXWORKS_RTP check. */
16864 if (TARGET_VXWORKS_RTP
16865 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16866 fputs ("(GOT)", asm_out_file);
16868 fputs ("(GOTOFF)", asm_out_file);
16870 fputc ('\n', asm_out_file);
16874 mode = GET_MODE (x);
16876 if (arm_vector_mode_supported_p (mode))
16880 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16882 units = CONST_VECTOR_NUNITS (x);
16883 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16885 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16886 for (i = 0; i < units; i++)
16888 rtx elt = CONST_VECTOR_ELT (x, i);
16890 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16893 for (i = 0; i < units; i++)
16895 rtx elt = CONST_VECTOR_ELT (x, i);
16896 REAL_VALUE_TYPE rval;
16898 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16901 (rval, GET_MODE_INNER (mode),
16902 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16908 return default_assemble_integer (x, size, aligned_p);
16912 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16916 if (!TARGET_AAPCS_BASED)
16919 default_named_section_asm_out_constructor
16920 : default_named_section_asm_out_destructor) (symbol, priority);
16924 /* Put these in the .init_array section, using a special relocation. */
16925 if (priority != DEFAULT_INIT_PRIORITY)
16928 sprintf (buf, "%s.%.5u",
16929 is_ctor ? ".init_array" : ".fini_array",
16931 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16938 switch_to_section (s);
16939 assemble_align (POINTER_SIZE);
16940 fputs ("\t.word\t", asm_out_file);
16941 output_addr_const (asm_out_file, symbol);
16942 fputs ("(target1)\n", asm_out_file);
16945 /* Add a function to the list of static constructors. */
16948 arm_elf_asm_constructor (rtx symbol, int priority)
16950 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16953 /* Add a function to the list of static destructors. */
16956 arm_elf_asm_destructor (rtx symbol, int priority)
16958 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16961 /* A finite state machine takes care of noticing whether or not instructions
16962 can be conditionally executed, and thus decrease execution time and code
16963 size by deleting branch instructions. The fsm is controlled by
16964 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16966 /* The state of the fsm controlling condition codes are:
16967 0: normal, do nothing special
16968 1: make ASM_OUTPUT_OPCODE not output this instruction
16969 2: make ASM_OUTPUT_OPCODE not output this instruction
16970 3: make instructions conditional
16971 4: make instructions conditional
16973 State transitions (state->state by whom under condition):
16974 0 -> 1 final_prescan_insn if the `target' is a label
16975 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16976 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16977 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16978 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16979 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16980 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16981 (the target insn is arm_target_insn).
16983 If the jump clobbers the conditions then we use states 2 and 4.
16985 A similar thing can be done with conditional return insns.
16987 XXX In case the `target' is an unconditional branch, this conditionalising
16988 of the instructions always reduces code size, but not always execution
16989 time. But then, I want to reduce the code size to somewhere near what
16990 /bin/cc produces. */
16992 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16993 instructions. When a COND_EXEC instruction is seen the subsequent
16994 instructions are scanned so that multiple conditional instructions can be
16995 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16996 specify the length and true/false mask for the IT block. These will be
16997 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16999 /* Returns the index of the ARM condition code string in
17000 `arm_condition_codes'. COMPARISON should be an rtx like
17001 `(eq (...) (...))'. */
17002 static enum arm_cond_code
17003 get_arm_condition_code (rtx comparison)
17005 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17006 enum arm_cond_code code;
17007 enum rtx_code comp_code = GET_CODE (comparison);
17009 if (GET_MODE_CLASS (mode) != MODE_CC)
17010 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17011 XEXP (comparison, 1));
17015 case CC_DNEmode: code = ARM_NE; goto dominance;
17016 case CC_DEQmode: code = ARM_EQ; goto dominance;
17017 case CC_DGEmode: code = ARM_GE; goto dominance;
17018 case CC_DGTmode: code = ARM_GT; goto dominance;
17019 case CC_DLEmode: code = ARM_LE; goto dominance;
17020 case CC_DLTmode: code = ARM_LT; goto dominance;
17021 case CC_DGEUmode: code = ARM_CS; goto dominance;
17022 case CC_DGTUmode: code = ARM_HI; goto dominance;
17023 case CC_DLEUmode: code = ARM_LS; goto dominance;
17024 case CC_DLTUmode: code = ARM_CC;
17027 gcc_assert (comp_code == EQ || comp_code == NE);
17029 if (comp_code == EQ)
17030 return ARM_INVERSE_CONDITION_CODE (code);
17036 case NE: return ARM_NE;
17037 case EQ: return ARM_EQ;
17038 case GE: return ARM_PL;
17039 case LT: return ARM_MI;
17040 default: gcc_unreachable ();
17046 case NE: return ARM_NE;
17047 case EQ: return ARM_EQ;
17048 default: gcc_unreachable ();
17054 case NE: return ARM_MI;
17055 case EQ: return ARM_PL;
17056 default: gcc_unreachable ();
17061 /* These encodings assume that AC=1 in the FPA system control
17062 byte. This allows us to handle all cases except UNEQ and
17066 case GE: return ARM_GE;
17067 case GT: return ARM_GT;
17068 case LE: return ARM_LS;
17069 case LT: return ARM_MI;
17070 case NE: return ARM_NE;
17071 case EQ: return ARM_EQ;
17072 case ORDERED: return ARM_VC;
17073 case UNORDERED: return ARM_VS;
17074 case UNLT: return ARM_LT;
17075 case UNLE: return ARM_LE;
17076 case UNGT: return ARM_HI;
17077 case UNGE: return ARM_PL;
17078 /* UNEQ and LTGT do not have a representation. */
17079 case UNEQ: /* Fall through. */
17080 case LTGT: /* Fall through. */
17081 default: gcc_unreachable ();
17087 case NE: return ARM_NE;
17088 case EQ: return ARM_EQ;
17089 case GE: return ARM_LE;
17090 case GT: return ARM_LT;
17091 case LE: return ARM_GE;
17092 case LT: return ARM_GT;
17093 case GEU: return ARM_LS;
17094 case GTU: return ARM_CC;
17095 case LEU: return ARM_CS;
17096 case LTU: return ARM_HI;
17097 default: gcc_unreachable ();
17103 case LTU: return ARM_CS;
17104 case GEU: return ARM_CC;
17105 default: gcc_unreachable ();
17111 case NE: return ARM_NE;
17112 case EQ: return ARM_EQ;
17113 case GEU: return ARM_CS;
17114 case GTU: return ARM_HI;
17115 case LEU: return ARM_LS;
17116 case LTU: return ARM_CC;
17117 default: gcc_unreachable ();
17123 case GE: return ARM_GE;
17124 case LT: return ARM_LT;
17125 case GEU: return ARM_CS;
17126 case LTU: return ARM_CC;
17127 default: gcc_unreachable ();
17133 case NE: return ARM_NE;
17134 case EQ: return ARM_EQ;
17135 case GE: return ARM_GE;
17136 case GT: return ARM_GT;
17137 case LE: return ARM_LE;
17138 case LT: return ARM_LT;
17139 case GEU: return ARM_CS;
17140 case GTU: return ARM_HI;
17141 case LEU: return ARM_LS;
17142 case LTU: return ARM_CC;
17143 default: gcc_unreachable ();
17146 default: gcc_unreachable ();
17150 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17153 thumb2_final_prescan_insn (rtx insn)
17155 rtx first_insn = insn;
17156 rtx body = PATTERN (insn);
17158 enum arm_cond_code code;
17162 /* Remove the previous insn from the count of insns to be output. */
17163 if (arm_condexec_count)
17164 arm_condexec_count--;
17166 /* Nothing to do if we are already inside a conditional block. */
17167 if (arm_condexec_count)
17170 if (GET_CODE (body) != COND_EXEC)
17173 /* Conditional jumps are implemented directly. */
17174 if (GET_CODE (insn) == JUMP_INSN)
17177 predicate = COND_EXEC_TEST (body);
17178 arm_current_cc = get_arm_condition_code (predicate);
17180 n = get_attr_ce_count (insn);
17181 arm_condexec_count = 1;
17182 arm_condexec_mask = (1 << n) - 1;
17183 arm_condexec_masklen = n;
17184 /* See if subsequent instructions can be combined into the same block. */
17187 insn = next_nonnote_insn (insn);
17189 /* Jumping into the middle of an IT block is illegal, so a label or
17190 barrier terminates the block. */
17191 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17194 body = PATTERN (insn);
17195 /* USE and CLOBBER aren't really insns, so just skip them. */
17196 if (GET_CODE (body) == USE
17197 || GET_CODE (body) == CLOBBER)
17200 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17201 if (GET_CODE (body) != COND_EXEC)
17203 /* Allow up to 4 conditionally executed instructions in a block. */
17204 n = get_attr_ce_count (insn);
17205 if (arm_condexec_masklen + n > 4)
17208 predicate = COND_EXEC_TEST (body);
17209 code = get_arm_condition_code (predicate);
17210 mask = (1 << n) - 1;
17211 if (arm_current_cc == code)
17212 arm_condexec_mask |= (mask << arm_condexec_masklen);
17213 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17216 arm_condexec_count++;
17217 arm_condexec_masklen += n;
17219 /* A jump must be the last instruction in a conditional block. */
17220 if (GET_CODE(insn) == JUMP_INSN)
17223 /* Restore recog_data (getting the attributes of other insns can
17224 destroy this array, but final.c assumes that it remains intact
17225 across this call). */
17226 extract_constrain_insn_cached (first_insn);
17230 arm_final_prescan_insn (rtx insn)
17232 /* BODY will hold the body of INSN. */
17233 rtx body = PATTERN (insn);
17235 /* This will be 1 if trying to repeat the trick, and things need to be
17236 reversed if it appears to fail. */
17239 /* If we start with a return insn, we only succeed if we find another one. */
17240 int seeking_return = 0;
17242 /* START_INSN will hold the insn from where we start looking. This is the
17243 first insn after the following code_label if REVERSE is true. */
17244 rtx start_insn = insn;
17246 /* If in state 4, check if the target branch is reached, in order to
17247 change back to state 0. */
17248 if (arm_ccfsm_state == 4)
17250 if (insn == arm_target_insn)
17252 arm_target_insn = NULL;
17253 arm_ccfsm_state = 0;
17258 /* If in state 3, it is possible to repeat the trick, if this insn is an
17259 unconditional branch to a label, and immediately following this branch
17260 is the previous target label which is only used once, and the label this
17261 branch jumps to is not too far off. */
17262 if (arm_ccfsm_state == 3)
17264 if (simplejump_p (insn))
17266 start_insn = next_nonnote_insn (start_insn);
17267 if (GET_CODE (start_insn) == BARRIER)
17269 /* XXX Isn't this always a barrier? */
17270 start_insn = next_nonnote_insn (start_insn);
17272 if (GET_CODE (start_insn) == CODE_LABEL
17273 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17274 && LABEL_NUSES (start_insn) == 1)
17279 else if (GET_CODE (body) == RETURN)
17281 start_insn = next_nonnote_insn (start_insn);
17282 if (GET_CODE (start_insn) == BARRIER)
17283 start_insn = next_nonnote_insn (start_insn);
17284 if (GET_CODE (start_insn) == CODE_LABEL
17285 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17286 && LABEL_NUSES (start_insn) == 1)
17289 seeking_return = 1;
17298 gcc_assert (!arm_ccfsm_state || reverse);
17299 if (GET_CODE (insn) != JUMP_INSN)
17302 /* This jump might be paralleled with a clobber of the condition codes
17303 the jump should always come first */
17304 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17305 body = XVECEXP (body, 0, 0);
17308 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17309 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17312 int fail = FALSE, succeed = FALSE;
17313 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17314 int then_not_else = TRUE;
17315 rtx this_insn = start_insn, label = 0;
17317 /* Register the insn jumped to. */
17320 if (!seeking_return)
17321 label = XEXP (SET_SRC (body), 0);
17323 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17324 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17325 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17327 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17328 then_not_else = FALSE;
17330 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17331 seeking_return = 1;
17332 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17334 seeking_return = 1;
17335 then_not_else = FALSE;
17338 gcc_unreachable ();
17340 /* See how many insns this branch skips, and what kind of insns. If all
17341 insns are okay, and the label or unconditional branch to the same
17342 label is not too far away, succeed. */
17343 for (insns_skipped = 0;
17344 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17348 this_insn = next_nonnote_insn (this_insn);
17352 switch (GET_CODE (this_insn))
17355 /* Succeed if it is the target label, otherwise fail since
17356 control falls in from somewhere else. */
17357 if (this_insn == label)
17359 arm_ccfsm_state = 1;
17367 /* Succeed if the following insn is the target label.
17369 If return insns are used then the last insn in a function
17370 will be a barrier. */
17371 this_insn = next_nonnote_insn (this_insn);
17372 if (this_insn && this_insn == label)
17374 arm_ccfsm_state = 1;
17382 /* The AAPCS says that conditional calls should not be
17383 used since they make interworking inefficient (the
17384 linker can't transform BL<cond> into BLX). That's
17385 only a problem if the machine has BLX. */
17392 /* Succeed if the following insn is the target label, or
17393 if the following two insns are a barrier and the
17395 this_insn = next_nonnote_insn (this_insn);
17396 if (this_insn && GET_CODE (this_insn) == BARRIER)
17397 this_insn = next_nonnote_insn (this_insn);
17399 if (this_insn && this_insn == label
17400 && insns_skipped < max_insns_skipped)
17402 arm_ccfsm_state = 1;
17410 /* If this is an unconditional branch to the same label, succeed.
17411 If it is to another label, do nothing. If it is conditional,
17413 /* XXX Probably, the tests for SET and the PC are
17416 scanbody = PATTERN (this_insn);
17417 if (GET_CODE (scanbody) == SET
17418 && GET_CODE (SET_DEST (scanbody)) == PC)
17420 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17421 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17423 arm_ccfsm_state = 2;
17426 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17429 /* Fail if a conditional return is undesirable (e.g. on a
17430 StrongARM), but still allow this if optimizing for size. */
17431 else if (GET_CODE (scanbody) == RETURN
17432 && !use_return_insn (TRUE, NULL)
17435 else if (GET_CODE (scanbody) == RETURN
17438 arm_ccfsm_state = 2;
17441 else if (GET_CODE (scanbody) == PARALLEL)
17443 switch (get_attr_conds (this_insn))
17453 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17458 /* Instructions using or affecting the condition codes make it
17460 scanbody = PATTERN (this_insn);
17461 if (!(GET_CODE (scanbody) == SET
17462 || GET_CODE (scanbody) == PARALLEL)
17463 || get_attr_conds (this_insn) != CONDS_NOCOND)
17466 /* A conditional cirrus instruction must be followed by
17467 a non Cirrus instruction. However, since we
17468 conditionalize instructions in this function and by
17469 the time we get here we can't add instructions
17470 (nops), because shorten_branches() has already been
17471 called, we will disable conditionalizing Cirrus
17472 instructions to be safe. */
17473 if (GET_CODE (scanbody) != USE
17474 && GET_CODE (scanbody) != CLOBBER
17475 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17485 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17486 arm_target_label = CODE_LABEL_NUMBER (label);
17489 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17491 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17493 this_insn = next_nonnote_insn (this_insn);
17494 gcc_assert (!this_insn
17495 || (GET_CODE (this_insn) != BARRIER
17496 && GET_CODE (this_insn) != CODE_LABEL));
17500 /* Oh, dear! we ran off the end.. give up. */
17501 extract_constrain_insn_cached (insn);
17502 arm_ccfsm_state = 0;
17503 arm_target_insn = NULL;
17506 arm_target_insn = this_insn;
17509 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17512 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17514 if (reverse || then_not_else)
17515 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17518 /* Restore recog_data (getting the attributes of other insns can
17519 destroy this array, but final.c assumes that it remains intact
17520 across this call. */
17521 extract_constrain_insn_cached (insn);
17525 /* Output IT instructions. */
17527 thumb2_asm_output_opcode (FILE * stream)
17532 if (arm_condexec_mask)
17534 for (n = 0; n < arm_condexec_masklen; n++)
17535 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17537 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17538 arm_condition_codes[arm_current_cc]);
17539 arm_condexec_mask = 0;
17543 /* Returns true if REGNO is a valid register
17544 for holding a quantity of type MODE. */
17546 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17548 if (GET_MODE_CLASS (mode) == MODE_CC)
17549 return (regno == CC_REGNUM
17550 || (TARGET_HARD_FLOAT && TARGET_VFP
17551 && regno == VFPCC_REGNUM));
17554 /* For the Thumb we only allow values bigger than SImode in
17555 registers 0 - 6, so that there is always a second low
17556 register available to hold the upper part of the value.
17557 We probably we ought to ensure that the register is the
17558 start of an even numbered register pair. */
17559 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17561 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17562 && IS_CIRRUS_REGNUM (regno))
17563 /* We have outlawed SI values in Cirrus registers because they
17564 reside in the lower 32 bits, but SF values reside in the
17565 upper 32 bits. This causes gcc all sorts of grief. We can't
17566 even split the registers into pairs because Cirrus SI values
17567 get sign extended to 64bits-- aldyh. */
17568 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17570 if (TARGET_HARD_FLOAT && TARGET_VFP
17571 && IS_VFP_REGNUM (regno))
17573 if (mode == SFmode || mode == SImode)
17574 return VFP_REGNO_OK_FOR_SINGLE (regno);
17576 if (mode == DFmode)
17577 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17579 /* VFP registers can hold HFmode values, but there is no point in
17580 putting them there unless we have hardware conversion insns. */
17581 if (mode == HFmode)
17582 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17585 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17586 || (VALID_NEON_QREG_MODE (mode)
17587 && NEON_REGNO_OK_FOR_QUAD (regno))
17588 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17589 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17590 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17591 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17592 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17597 if (TARGET_REALLY_IWMMXT)
17599 if (IS_IWMMXT_GR_REGNUM (regno))
17600 return mode == SImode;
17602 if (IS_IWMMXT_REGNUM (regno))
17603 return VALID_IWMMXT_REG_MODE (mode);
17606 /* We allow almost any value to be stored in the general registers.
17607 Restrict doubleword quantities to even register pairs so that we can
17608 use ldrd. Do not allow very large Neon structure opaque modes in
17609 general registers; they would use too many. */
17610 if (regno <= LAST_ARM_REGNUM)
17611 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17612 && ARM_NUM_REGS (mode) <= 4;
17614 if (regno == FRAME_POINTER_REGNUM
17615 || regno == ARG_POINTER_REGNUM)
17616 /* We only allow integers in the fake hard registers. */
17617 return GET_MODE_CLASS (mode) == MODE_INT;
17619 /* The only registers left are the FPA registers
17620 which we only allow to hold FP values. */
17621 return (TARGET_HARD_FLOAT && TARGET_FPA
17622 && GET_MODE_CLASS (mode) == MODE_FLOAT
17623 && regno >= FIRST_FPA_REGNUM
17624 && regno <= LAST_FPA_REGNUM);
17627 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17628 not used in arm mode. */
17631 arm_regno_class (int regno)
17635 if (regno == STACK_POINTER_REGNUM)
17637 if (regno == CC_REGNUM)
17644 if (TARGET_THUMB2 && regno < 8)
17647 if ( regno <= LAST_ARM_REGNUM
17648 || regno == FRAME_POINTER_REGNUM
17649 || regno == ARG_POINTER_REGNUM)
17650 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17652 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17653 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17655 if (IS_CIRRUS_REGNUM (regno))
17656 return CIRRUS_REGS;
17658 if (IS_VFP_REGNUM (regno))
17660 if (regno <= D7_VFP_REGNUM)
17661 return VFP_D0_D7_REGS;
17662 else if (regno <= LAST_LO_VFP_REGNUM)
17663 return VFP_LO_REGS;
17665 return VFP_HI_REGS;
17668 if (IS_IWMMXT_REGNUM (regno))
17669 return IWMMXT_REGS;
17671 if (IS_IWMMXT_GR_REGNUM (regno))
17672 return IWMMXT_GR_REGS;
17677 /* Handle a special case when computing the offset
17678 of an argument from the frame pointer. */
17680 arm_debugger_arg_offset (int value, rtx addr)
17684 /* We are only interested if dbxout_parms() failed to compute the offset. */
17688 /* We can only cope with the case where the address is held in a register. */
17689 if (GET_CODE (addr) != REG)
17692 /* If we are using the frame pointer to point at the argument, then
17693 an offset of 0 is correct. */
17694 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17697 /* If we are using the stack pointer to point at the
17698 argument, then an offset of 0 is correct. */
17699 /* ??? Check this is consistent with thumb2 frame layout. */
17700 if ((TARGET_THUMB || !frame_pointer_needed)
17701 && REGNO (addr) == SP_REGNUM)
17704 /* Oh dear. The argument is pointed to by a register rather
17705 than being held in a register, or being stored at a known
17706 offset from the frame pointer. Since GDB only understands
17707 those two kinds of argument we must translate the address
17708 held in the register into an offset from the frame pointer.
17709 We do this by searching through the insns for the function
17710 looking to see where this register gets its value. If the
17711 register is initialized from the frame pointer plus an offset
17712 then we are in luck and we can continue, otherwise we give up.
17714 This code is exercised by producing debugging information
17715 for a function with arguments like this:
17717 double func (double a, double b, int c, double d) {return d;}
17719 Without this code the stab for parameter 'd' will be set to
17720 an offset of 0 from the frame pointer, rather than 8. */
17722 /* The if() statement says:
17724 If the insn is a normal instruction
17725 and if the insn is setting the value in a register
17726 and if the register being set is the register holding the address of the argument
17727 and if the address is computing by an addition
17728 that involves adding to a register
17729 which is the frame pointer
17734 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17736 if ( GET_CODE (insn) == INSN
17737 && GET_CODE (PATTERN (insn)) == SET
17738 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17739 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17740 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17741 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17742 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17745 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17754 warning (0, "unable to compute real location of stacked parameter");
17755 value = 8; /* XXX magic hack */
17761 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17764 if ((MASK) & insn_flags) \
17765 add_builtin_function ((NAME), (TYPE), (CODE), \
17766 BUILT_IN_MD, NULL, NULL_TREE); \
17770 struct builtin_description
17772 const unsigned int mask;
17773 const enum insn_code icode;
17774 const char * const name;
17775 const enum arm_builtins code;
17776 const enum rtx_code comparison;
17777 const unsigned int flag;
17780 static const struct builtin_description bdesc_2arg[] =
17782 #define IWMMXT_BUILTIN(code, string, builtin) \
17783 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17784 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17786 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17787 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17788 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17789 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17790 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17791 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17792 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17793 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17794 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17795 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17796 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17797 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17798 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17799 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17800 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17801 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17802 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17803 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17804 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17805 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17806 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17807 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17808 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17809 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17810 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17811 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17812 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17813 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17814 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17815 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17816 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17817 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17818 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17819 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17820 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17821 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17822 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17823 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17824 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17825 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17826 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17827 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17828 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17829 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17830 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17831 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17832 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17833 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17834 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17835 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17836 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17837 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17838 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17839 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17840 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17841 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17842 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17843 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17845 #define IWMMXT_BUILTIN2(code, builtin) \
17846 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17848 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17849 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17850 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17851 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17852 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17853 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17854 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17855 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17856 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17857 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17858 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17859 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17860 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17861 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17862 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17863 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17864 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17865 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17866 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17867 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17868 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17869 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17870 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17871 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17872 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17873 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17874 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17875 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17876 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17877 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17878 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17879 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17882 static const struct builtin_description bdesc_1arg[] =
17884 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17885 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17886 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17887 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17888 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17889 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17890 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17891 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17892 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17893 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17894 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17895 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17896 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17897 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17898 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17899 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17900 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17901 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17904 /* Set up all the iWMMXt builtins. This is
17905 not called if TARGET_IWMMXT is zero. */
17908 arm_init_iwmmxt_builtins (void)
17910 const struct builtin_description * d;
17912 tree endlink = void_list_node;
17914 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17915 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17916 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17919 = build_function_type (integer_type_node,
17920 tree_cons (NULL_TREE, integer_type_node, endlink));
17921 tree v8qi_ftype_v8qi_v8qi_int
17922 = build_function_type (V8QI_type_node,
17923 tree_cons (NULL_TREE, V8QI_type_node,
17924 tree_cons (NULL_TREE, V8QI_type_node,
17925 tree_cons (NULL_TREE,
17928 tree v4hi_ftype_v4hi_int
17929 = build_function_type (V4HI_type_node,
17930 tree_cons (NULL_TREE, V4HI_type_node,
17931 tree_cons (NULL_TREE, integer_type_node,
17933 tree v2si_ftype_v2si_int
17934 = build_function_type (V2SI_type_node,
17935 tree_cons (NULL_TREE, V2SI_type_node,
17936 tree_cons (NULL_TREE, integer_type_node,
17938 tree v2si_ftype_di_di
17939 = build_function_type (V2SI_type_node,
17940 tree_cons (NULL_TREE, long_long_integer_type_node,
17941 tree_cons (NULL_TREE, long_long_integer_type_node,
17943 tree di_ftype_di_int
17944 = build_function_type (long_long_integer_type_node,
17945 tree_cons (NULL_TREE, long_long_integer_type_node,
17946 tree_cons (NULL_TREE, integer_type_node,
17948 tree di_ftype_di_int_int
17949 = build_function_type (long_long_integer_type_node,
17950 tree_cons (NULL_TREE, long_long_integer_type_node,
17951 tree_cons (NULL_TREE, integer_type_node,
17952 tree_cons (NULL_TREE,
17955 tree int_ftype_v8qi
17956 = build_function_type (integer_type_node,
17957 tree_cons (NULL_TREE, V8QI_type_node,
17959 tree int_ftype_v4hi
17960 = build_function_type (integer_type_node,
17961 tree_cons (NULL_TREE, V4HI_type_node,
17963 tree int_ftype_v2si
17964 = build_function_type (integer_type_node,
17965 tree_cons (NULL_TREE, V2SI_type_node,
17967 tree int_ftype_v8qi_int
17968 = build_function_type (integer_type_node,
17969 tree_cons (NULL_TREE, V8QI_type_node,
17970 tree_cons (NULL_TREE, integer_type_node,
17972 tree int_ftype_v4hi_int
17973 = build_function_type (integer_type_node,
17974 tree_cons (NULL_TREE, V4HI_type_node,
17975 tree_cons (NULL_TREE, integer_type_node,
17977 tree int_ftype_v2si_int
17978 = build_function_type (integer_type_node,
17979 tree_cons (NULL_TREE, V2SI_type_node,
17980 tree_cons (NULL_TREE, integer_type_node,
17982 tree v8qi_ftype_v8qi_int_int
17983 = build_function_type (V8QI_type_node,
17984 tree_cons (NULL_TREE, V8QI_type_node,
17985 tree_cons (NULL_TREE, integer_type_node,
17986 tree_cons (NULL_TREE,
17989 tree v4hi_ftype_v4hi_int_int
17990 = build_function_type (V4HI_type_node,
17991 tree_cons (NULL_TREE, V4HI_type_node,
17992 tree_cons (NULL_TREE, integer_type_node,
17993 tree_cons (NULL_TREE,
17996 tree v2si_ftype_v2si_int_int
17997 = build_function_type (V2SI_type_node,
17998 tree_cons (NULL_TREE, V2SI_type_node,
17999 tree_cons (NULL_TREE, integer_type_node,
18000 tree_cons (NULL_TREE,
18003 /* Miscellaneous. */
18004 tree v8qi_ftype_v4hi_v4hi
18005 = build_function_type (V8QI_type_node,
18006 tree_cons (NULL_TREE, V4HI_type_node,
18007 tree_cons (NULL_TREE, V4HI_type_node,
18009 tree v4hi_ftype_v2si_v2si
18010 = build_function_type (V4HI_type_node,
18011 tree_cons (NULL_TREE, V2SI_type_node,
18012 tree_cons (NULL_TREE, V2SI_type_node,
18014 tree v2si_ftype_v4hi_v4hi
18015 = build_function_type (V2SI_type_node,
18016 tree_cons (NULL_TREE, V4HI_type_node,
18017 tree_cons (NULL_TREE, V4HI_type_node,
18019 tree v2si_ftype_v8qi_v8qi
18020 = build_function_type (V2SI_type_node,
18021 tree_cons (NULL_TREE, V8QI_type_node,
18022 tree_cons (NULL_TREE, V8QI_type_node,
18024 tree v4hi_ftype_v4hi_di
18025 = build_function_type (V4HI_type_node,
18026 tree_cons (NULL_TREE, V4HI_type_node,
18027 tree_cons (NULL_TREE,
18028 long_long_integer_type_node,
18030 tree v2si_ftype_v2si_di
18031 = build_function_type (V2SI_type_node,
18032 tree_cons (NULL_TREE, V2SI_type_node,
18033 tree_cons (NULL_TREE,
18034 long_long_integer_type_node,
18036 tree void_ftype_int_int
18037 = build_function_type (void_type_node,
18038 tree_cons (NULL_TREE, integer_type_node,
18039 tree_cons (NULL_TREE, integer_type_node,
18042 = build_function_type (long_long_unsigned_type_node, endlink);
18044 = build_function_type (long_long_integer_type_node,
18045 tree_cons (NULL_TREE, V8QI_type_node,
18048 = build_function_type (long_long_integer_type_node,
18049 tree_cons (NULL_TREE, V4HI_type_node,
18052 = build_function_type (long_long_integer_type_node,
18053 tree_cons (NULL_TREE, V2SI_type_node,
18055 tree v2si_ftype_v4hi
18056 = build_function_type (V2SI_type_node,
18057 tree_cons (NULL_TREE, V4HI_type_node,
18059 tree v4hi_ftype_v8qi
18060 = build_function_type (V4HI_type_node,
18061 tree_cons (NULL_TREE, V8QI_type_node,
18064 tree di_ftype_di_v4hi_v4hi
18065 = build_function_type (long_long_unsigned_type_node,
18066 tree_cons (NULL_TREE,
18067 long_long_unsigned_type_node,
18068 tree_cons (NULL_TREE, V4HI_type_node,
18069 tree_cons (NULL_TREE,
18073 tree di_ftype_v4hi_v4hi
18074 = build_function_type (long_long_unsigned_type_node,
18075 tree_cons (NULL_TREE, V4HI_type_node,
18076 tree_cons (NULL_TREE, V4HI_type_node,
18079 /* Normal vector binops. */
18080 tree v8qi_ftype_v8qi_v8qi
18081 = build_function_type (V8QI_type_node,
18082 tree_cons (NULL_TREE, V8QI_type_node,
18083 tree_cons (NULL_TREE, V8QI_type_node,
18085 tree v4hi_ftype_v4hi_v4hi
18086 = build_function_type (V4HI_type_node,
18087 tree_cons (NULL_TREE, V4HI_type_node,
18088 tree_cons (NULL_TREE, V4HI_type_node,
18090 tree v2si_ftype_v2si_v2si
18091 = build_function_type (V2SI_type_node,
18092 tree_cons (NULL_TREE, V2SI_type_node,
18093 tree_cons (NULL_TREE, V2SI_type_node,
18095 tree di_ftype_di_di
18096 = build_function_type (long_long_unsigned_type_node,
18097 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18098 tree_cons (NULL_TREE,
18099 long_long_unsigned_type_node,
18102 /* Add all builtins that are more or less simple operations on two
18104 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18106 /* Use one of the operands; the target can have a different mode for
18107 mask-generating compares. */
18108 enum machine_mode mode;
18114 mode = insn_data[d->icode].operand[1].mode;
18119 type = v8qi_ftype_v8qi_v8qi;
18122 type = v4hi_ftype_v4hi_v4hi;
18125 type = v2si_ftype_v2si_v2si;
18128 type = di_ftype_di_di;
18132 gcc_unreachable ();
18135 def_mbuiltin (d->mask, d->name, type, d->code);
18138 /* Add the remaining MMX insns with somewhat more complicated types. */
18139 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18140 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18141 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18143 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18144 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18145 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18146 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18147 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18148 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18150 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18151 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18152 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18153 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18154 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18155 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18157 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18158 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18159 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18160 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18161 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18162 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18164 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18165 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18166 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18167 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18168 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18169 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18171 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18173 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18174 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18175 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18176 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18178 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18179 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18180 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18181 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18182 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18183 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18184 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18185 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18186 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18188 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18189 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18190 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18192 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18194 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18196 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18197 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18198 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18199 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18200 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18201 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18203 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18204 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18205 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18206 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18207 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18208 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18209 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18210 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18211 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18212 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18213 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18217 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18222 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18223 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18224 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18226 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18227 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18231 arm_init_tls_builtins (void)
18235 ftype = build_function_type (ptr_type_node, void_list_node);
18236 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18237 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18239 TREE_NOTHROW (decl) = 1;
18240 TREE_READONLY (decl) = 1;
18243 enum neon_builtin_type_bits {
18259 #define v8qi_UP T_V8QI
18260 #define v4hi_UP T_V4HI
18261 #define v2si_UP T_V2SI
18262 #define v2sf_UP T_V2SF
18264 #define v16qi_UP T_V16QI
18265 #define v8hi_UP T_V8HI
18266 #define v4si_UP T_V4SI
18267 #define v4sf_UP T_V4SF
18268 #define v2di_UP T_V2DI
18273 #define UP(X) X##_UP
18308 NEON_LOADSTRUCTLANE,
18310 NEON_STORESTRUCTLANE,
18319 const neon_itype itype;
18321 const enum insn_code codes[T_MAX];
18322 const unsigned int num_vars;
18323 unsigned int base_fcode;
18324 } neon_builtin_datum;
18326 #define CF(N,X) CODE_FOR_neon_##N##X
18328 #define VAR1(T, N, A) \
18329 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18330 #define VAR2(T, N, A, B) \
18331 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18332 #define VAR3(T, N, A, B, C) \
18333 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18334 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18335 #define VAR4(T, N, A, B, C, D) \
18336 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18337 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18338 #define VAR5(T, N, A, B, C, D, E) \
18339 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18340 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18341 #define VAR6(T, N, A, B, C, D, E, F) \
18342 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18343 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18344 #define VAR7(T, N, A, B, C, D, E, F, G) \
18345 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18346 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18348 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18349 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18351 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18352 CF (N, G), CF (N, H) }, 8, 0
18353 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18354 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18355 | UP (H) | UP (I), \
18356 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18357 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18358 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18359 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18360 | UP (H) | UP (I) | UP (J), \
18361 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18362 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18364 /* The mode entries in the following table correspond to the "key" type of the
18365 instruction variant, i.e. equivalent to that which would be specified after
18366 the assembler mnemonic, which usually refers to the last vector operand.
18367 (Signed/unsigned/polynomial types are not differentiated between though, and
18368 are all mapped onto the same mode for a given element size.) The modes
18369 listed per instruction should be the same as those defined for that
18370 instruction's pattern in neon.md.
18371 WARNING: Variants should be listed in the same increasing order as
18372 neon_builtin_type_bits. */
18374 static neon_builtin_datum neon_builtin_data[] =
18376 { VAR10 (BINOP, vadd,
18377 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18378 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18379 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18380 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18381 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18382 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18383 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18384 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18385 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18386 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18387 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18388 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18389 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18390 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18391 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18392 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18393 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18394 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18395 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18396 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18397 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18398 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18399 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18400 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18401 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18402 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18403 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18404 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18405 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18406 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18407 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18408 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18409 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18410 { VAR10 (BINOP, vsub,
18411 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18412 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18413 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18414 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18415 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18416 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18417 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18418 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18419 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18420 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18421 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18422 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18423 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18424 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18425 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18426 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18427 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18428 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18429 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18430 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18431 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18432 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18433 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18434 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18435 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18436 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18437 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18438 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18439 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18440 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18441 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18442 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18443 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18444 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18445 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18446 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18447 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18448 /* FIXME: vget_lane supports more variants than this! */
18449 { VAR10 (GETLANE, vget_lane,
18450 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18451 { VAR10 (SETLANE, vset_lane,
18452 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18453 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18454 { VAR10 (DUP, vdup_n,
18455 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18456 { VAR10 (DUPLANE, vdup_lane,
18457 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18458 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18459 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18460 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18461 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18462 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18463 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18464 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18465 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18466 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18467 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18468 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18469 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18470 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18471 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18472 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18473 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18474 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18475 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18476 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18477 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18478 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18479 { VAR10 (BINOP, vext,
18480 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18481 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18482 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18483 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18484 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18485 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18486 { VAR10 (SELECT, vbsl,
18487 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18488 { VAR1 (VTBL, vtbl1, v8qi) },
18489 { VAR1 (VTBL, vtbl2, v8qi) },
18490 { VAR1 (VTBL, vtbl3, v8qi) },
18491 { VAR1 (VTBL, vtbl4, v8qi) },
18492 { VAR1 (VTBX, vtbx1, v8qi) },
18493 { VAR1 (VTBX, vtbx2, v8qi) },
18494 { VAR1 (VTBX, vtbx3, v8qi) },
18495 { VAR1 (VTBX, vtbx4, v8qi) },
18496 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18497 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18498 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18499 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18500 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18501 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18502 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18503 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18504 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18505 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18506 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18507 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18508 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18509 { VAR10 (LOAD1, vld1,
18510 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18511 { VAR10 (LOAD1LANE, vld1_lane,
18512 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18513 { VAR10 (LOAD1, vld1_dup,
18514 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18515 { VAR10 (STORE1, vst1,
18516 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18517 { VAR10 (STORE1LANE, vst1_lane,
18518 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18519 { VAR9 (LOADSTRUCT,
18520 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18521 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18522 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18523 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18524 { VAR9 (STORESTRUCT, vst2,
18525 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18526 { VAR7 (STORESTRUCTLANE, vst2_lane,
18527 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18528 { VAR9 (LOADSTRUCT,
18529 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18530 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18531 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18532 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18533 { VAR9 (STORESTRUCT, vst3,
18534 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18535 { VAR7 (STORESTRUCTLANE, vst3_lane,
18536 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18537 { VAR9 (LOADSTRUCT, vld4,
18538 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18539 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18540 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18541 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18542 { VAR9 (STORESTRUCT, vst4,
18543 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18544 { VAR7 (STORESTRUCTLANE, vst4_lane,
18545 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18546 { VAR10 (LOGICBINOP, vand,
18547 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18548 { VAR10 (LOGICBINOP, vorr,
18549 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18550 { VAR10 (BINOP, veor,
18551 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18552 { VAR10 (LOGICBINOP, vbic,
18553 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18554 { VAR10 (LOGICBINOP, vorn,
18555 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18571 arm_init_neon_builtins (void)
18573 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18575 tree neon_intQI_type_node;
18576 tree neon_intHI_type_node;
18577 tree neon_polyQI_type_node;
18578 tree neon_polyHI_type_node;
18579 tree neon_intSI_type_node;
18580 tree neon_intDI_type_node;
18581 tree neon_float_type_node;
18583 tree intQI_pointer_node;
18584 tree intHI_pointer_node;
18585 tree intSI_pointer_node;
18586 tree intDI_pointer_node;
18587 tree float_pointer_node;
18589 tree const_intQI_node;
18590 tree const_intHI_node;
18591 tree const_intSI_node;
18592 tree const_intDI_node;
18593 tree const_float_node;
18595 tree const_intQI_pointer_node;
18596 tree const_intHI_pointer_node;
18597 tree const_intSI_pointer_node;
18598 tree const_intDI_pointer_node;
18599 tree const_float_pointer_node;
18601 tree V8QI_type_node;
18602 tree V4HI_type_node;
18603 tree V2SI_type_node;
18604 tree V2SF_type_node;
18605 tree V16QI_type_node;
18606 tree V8HI_type_node;
18607 tree V4SI_type_node;
18608 tree V4SF_type_node;
18609 tree V2DI_type_node;
18611 tree intUQI_type_node;
18612 tree intUHI_type_node;
18613 tree intUSI_type_node;
18614 tree intUDI_type_node;
18616 tree intEI_type_node;
18617 tree intOI_type_node;
18618 tree intCI_type_node;
18619 tree intXI_type_node;
18621 tree V8QI_pointer_node;
18622 tree V4HI_pointer_node;
18623 tree V2SI_pointer_node;
18624 tree V2SF_pointer_node;
18625 tree V16QI_pointer_node;
18626 tree V8HI_pointer_node;
18627 tree V4SI_pointer_node;
18628 tree V4SF_pointer_node;
18629 tree V2DI_pointer_node;
18631 tree void_ftype_pv8qi_v8qi_v8qi;
18632 tree void_ftype_pv4hi_v4hi_v4hi;
18633 tree void_ftype_pv2si_v2si_v2si;
18634 tree void_ftype_pv2sf_v2sf_v2sf;
18635 tree void_ftype_pdi_di_di;
18636 tree void_ftype_pv16qi_v16qi_v16qi;
18637 tree void_ftype_pv8hi_v8hi_v8hi;
18638 tree void_ftype_pv4si_v4si_v4si;
18639 tree void_ftype_pv4sf_v4sf_v4sf;
18640 tree void_ftype_pv2di_v2di_v2di;
18642 tree reinterp_ftype_dreg[5][5];
18643 tree reinterp_ftype_qreg[5][5];
18644 tree dreg_types[5], qreg_types[5];
18646 /* Create distinguished type nodes for NEON vector element types,
18647 and pointers to values of such types, so we can detect them later. */
18648 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18649 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18650 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18651 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18652 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18653 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18654 neon_float_type_node = make_node (REAL_TYPE);
18655 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18656 layout_type (neon_float_type_node);
18658 /* Define typedefs which exactly correspond to the modes we are basing vector
18659 types on. If you change these names you'll need to change
18660 the table used by arm_mangle_type too. */
18661 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18662 "__builtin_neon_qi");
18663 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18664 "__builtin_neon_hi");
18665 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18666 "__builtin_neon_si");
18667 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18668 "__builtin_neon_sf");
18669 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18670 "__builtin_neon_di");
18671 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18672 "__builtin_neon_poly8");
18673 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18674 "__builtin_neon_poly16");
18676 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18677 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18678 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18679 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18680 float_pointer_node = build_pointer_type (neon_float_type_node);
18682 /* Next create constant-qualified versions of the above types. */
18683 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18685 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18687 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18689 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18691 const_float_node = build_qualified_type (neon_float_type_node,
18694 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18695 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18696 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18697 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18698 const_float_pointer_node = build_pointer_type (const_float_node);
18700 /* Now create vector types based on our NEON element types. */
18701 /* 64-bit vectors. */
18703 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18705 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18707 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18709 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18710 /* 128-bit vectors. */
18712 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18714 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18716 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18718 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18720 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18722 /* Unsigned integer types for various mode sizes. */
18723 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18724 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18725 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18726 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18728 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18729 "__builtin_neon_uqi");
18730 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18731 "__builtin_neon_uhi");
18732 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18733 "__builtin_neon_usi");
18734 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18735 "__builtin_neon_udi");
18737 /* Opaque integer types for structures of vectors. */
18738 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18739 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18740 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18741 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18743 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18744 "__builtin_neon_ti");
18745 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18746 "__builtin_neon_ei");
18747 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18748 "__builtin_neon_oi");
18749 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18750 "__builtin_neon_ci");
18751 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18752 "__builtin_neon_xi");
18754 /* Pointers to vector types. */
18755 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18756 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18757 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18758 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18759 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18760 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18761 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18762 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18763 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18765 /* Operations which return results as pairs. */
18766 void_ftype_pv8qi_v8qi_v8qi =
18767 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18768 V8QI_type_node, NULL);
18769 void_ftype_pv4hi_v4hi_v4hi =
18770 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18771 V4HI_type_node, NULL);
18772 void_ftype_pv2si_v2si_v2si =
18773 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18774 V2SI_type_node, NULL);
18775 void_ftype_pv2sf_v2sf_v2sf =
18776 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18777 V2SF_type_node, NULL);
18778 void_ftype_pdi_di_di =
18779 build_function_type_list (void_type_node, intDI_pointer_node,
18780 neon_intDI_type_node, neon_intDI_type_node, NULL);
18781 void_ftype_pv16qi_v16qi_v16qi =
18782 build_function_type_list (void_type_node, V16QI_pointer_node,
18783 V16QI_type_node, V16QI_type_node, NULL);
18784 void_ftype_pv8hi_v8hi_v8hi =
18785 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18786 V8HI_type_node, NULL);
18787 void_ftype_pv4si_v4si_v4si =
18788 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18789 V4SI_type_node, NULL);
18790 void_ftype_pv4sf_v4sf_v4sf =
18791 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18792 V4SF_type_node, NULL);
18793 void_ftype_pv2di_v2di_v2di =
18794 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18795 V2DI_type_node, NULL);
18797 dreg_types[0] = V8QI_type_node;
18798 dreg_types[1] = V4HI_type_node;
18799 dreg_types[2] = V2SI_type_node;
18800 dreg_types[3] = V2SF_type_node;
18801 dreg_types[4] = neon_intDI_type_node;
18803 qreg_types[0] = V16QI_type_node;
18804 qreg_types[1] = V8HI_type_node;
18805 qreg_types[2] = V4SI_type_node;
18806 qreg_types[3] = V4SF_type_node;
18807 qreg_types[4] = V2DI_type_node;
18809 for (i = 0; i < 5; i++)
18812 for (j = 0; j < 5; j++)
18814 reinterp_ftype_dreg[i][j]
18815 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18816 reinterp_ftype_qreg[i][j]
18817 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18821 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18823 neon_builtin_datum *d = &neon_builtin_data[i];
18824 unsigned int j, codeidx = 0;
18826 d->base_fcode = fcode;
18828 for (j = 0; j < T_MAX; j++)
18830 const char* const modenames[] = {
18831 "v8qi", "v4hi", "v2si", "v2sf", "di",
18832 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18836 enum insn_code icode;
18837 int is_load = 0, is_store = 0;
18839 if ((d->bits & (1 << j)) == 0)
18842 icode = d->codes[codeidx++];
18847 case NEON_LOAD1LANE:
18848 case NEON_LOADSTRUCT:
18849 case NEON_LOADSTRUCTLANE:
18851 /* Fall through. */
18853 case NEON_STORE1LANE:
18854 case NEON_STORESTRUCT:
18855 case NEON_STORESTRUCTLANE:
18858 /* Fall through. */
18861 case NEON_LOGICBINOP:
18862 case NEON_SHIFTINSERT:
18869 case NEON_SHIFTIMM:
18870 case NEON_SHIFTACC:
18876 case NEON_LANEMULL:
18877 case NEON_LANEMULH:
18879 case NEON_SCALARMUL:
18880 case NEON_SCALARMULL:
18881 case NEON_SCALARMULH:
18882 case NEON_SCALARMAC:
18888 tree return_type = void_type_node, args = void_list_node;
18890 /* Build a function type directly from the insn_data for this
18891 builtin. The build_function_type() function takes care of
18892 removing duplicates for us. */
18893 for (k = insn_data[icode].n_generator_args - 1; k >= 0; k--)
18897 if (is_load && k == 1)
18899 /* Neon load patterns always have the memory operand
18900 (a SImode pointer) in the operand 1 position. We
18901 want a const pointer to the element type in that
18903 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18909 eltype = const_intQI_pointer_node;
18914 eltype = const_intHI_pointer_node;
18919 eltype = const_intSI_pointer_node;
18924 eltype = const_float_pointer_node;
18929 eltype = const_intDI_pointer_node;
18932 default: gcc_unreachable ();
18935 else if (is_store && k == 0)
18937 /* Similarly, Neon store patterns use operand 0 as
18938 the memory location to store to (a SImode pointer).
18939 Use a pointer to the element type of the store in
18941 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18947 eltype = intQI_pointer_node;
18952 eltype = intHI_pointer_node;
18957 eltype = intSI_pointer_node;
18962 eltype = float_pointer_node;
18967 eltype = intDI_pointer_node;
18970 default: gcc_unreachable ();
18975 switch (insn_data[icode].operand[k].mode)
18977 case VOIDmode: eltype = void_type_node; break;
18979 case QImode: eltype = neon_intQI_type_node; break;
18980 case HImode: eltype = neon_intHI_type_node; break;
18981 case SImode: eltype = neon_intSI_type_node; break;
18982 case SFmode: eltype = neon_float_type_node; break;
18983 case DImode: eltype = neon_intDI_type_node; break;
18984 case TImode: eltype = intTI_type_node; break;
18985 case EImode: eltype = intEI_type_node; break;
18986 case OImode: eltype = intOI_type_node; break;
18987 case CImode: eltype = intCI_type_node; break;
18988 case XImode: eltype = intXI_type_node; break;
18989 /* 64-bit vectors. */
18990 case V8QImode: eltype = V8QI_type_node; break;
18991 case V4HImode: eltype = V4HI_type_node; break;
18992 case V2SImode: eltype = V2SI_type_node; break;
18993 case V2SFmode: eltype = V2SF_type_node; break;
18994 /* 128-bit vectors. */
18995 case V16QImode: eltype = V16QI_type_node; break;
18996 case V8HImode: eltype = V8HI_type_node; break;
18997 case V4SImode: eltype = V4SI_type_node; break;
18998 case V4SFmode: eltype = V4SF_type_node; break;
18999 case V2DImode: eltype = V2DI_type_node; break;
19000 default: gcc_unreachable ();
19004 if (k == 0 && !is_store)
19005 return_type = eltype;
19007 args = tree_cons (NULL_TREE, eltype, args);
19010 ftype = build_function_type (return_type, args);
19014 case NEON_RESULTPAIR:
19016 switch (insn_data[icode].operand[1].mode)
19018 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19019 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19020 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19021 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19022 case DImode: ftype = void_ftype_pdi_di_di; break;
19023 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19024 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19025 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19026 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19027 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19028 default: gcc_unreachable ();
19033 case NEON_REINTERP:
19035 /* We iterate over 5 doubleword types, then 5 quadword
19038 switch (insn_data[icode].operand[0].mode)
19040 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19041 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19042 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19043 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19044 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19045 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19046 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19047 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19048 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19049 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19050 default: gcc_unreachable ();
19056 gcc_unreachable ();
19059 gcc_assert (ftype != NULL);
19061 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19063 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19070 arm_init_fp16_builtins (void)
19072 tree fp16_type = make_node (REAL_TYPE);
19073 TYPE_PRECISION (fp16_type) = 16;
19074 layout_type (fp16_type);
19075 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19079 arm_init_builtins (void)
19081 arm_init_tls_builtins ();
19083 if (TARGET_REALLY_IWMMXT)
19084 arm_init_iwmmxt_builtins ();
19087 arm_init_neon_builtins ();
19089 if (arm_fp16_format)
19090 arm_init_fp16_builtins ();
19093 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19095 static const char *
19096 arm_invalid_parameter_type (const_tree t)
19098 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19099 return N_("function parameters cannot have __fp16 type");
19103 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19105 static const char *
19106 arm_invalid_return_type (const_tree t)
19108 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19109 return N_("functions cannot return __fp16 type");
19113 /* Implement TARGET_PROMOTED_TYPE. */
19116 arm_promoted_type (const_tree t)
19118 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19119 return float_type_node;
19123 /* Implement TARGET_CONVERT_TO_TYPE.
19124 Specifically, this hook implements the peculiarity of the ARM
19125 half-precision floating-point C semantics that requires conversions between
19126 __fp16 to or from double to do an intermediate conversion to float. */
19129 arm_convert_to_type (tree type, tree expr)
19131 tree fromtype = TREE_TYPE (expr);
19132 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19134 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19135 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19136 return convert (type, convert (float_type_node, expr));
19140 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19141 This simply adds HFmode as a supported mode; even though we don't
19142 implement arithmetic on this type directly, it's supported by
19143 optabs conversions, much the way the double-word arithmetic is
19144 special-cased in the default hook. */
19147 arm_scalar_mode_supported_p (enum machine_mode mode)
19149 if (mode == HFmode)
19150 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19152 return default_scalar_mode_supported_p (mode);
19155 /* Errors in the source file can cause expand_expr to return const0_rtx
19156 where we expect a vector. To avoid crashing, use one of the vector
19157 clear instructions. */
19160 safe_vector_operand (rtx x, enum machine_mode mode)
19162 if (x != const0_rtx)
19164 x = gen_reg_rtx (mode);
19166 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19167 : gen_rtx_SUBREG (DImode, x, 0)));
19171 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19174 arm_expand_binop_builtin (enum insn_code icode,
19175 tree exp, rtx target)
19178 tree arg0 = CALL_EXPR_ARG (exp, 0);
19179 tree arg1 = CALL_EXPR_ARG (exp, 1);
19180 rtx op0 = expand_normal (arg0);
19181 rtx op1 = expand_normal (arg1);
19182 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19183 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19184 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19186 if (VECTOR_MODE_P (mode0))
19187 op0 = safe_vector_operand (op0, mode0);
19188 if (VECTOR_MODE_P (mode1))
19189 op1 = safe_vector_operand (op1, mode1);
19192 || GET_MODE (target) != tmode
19193 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19194 target = gen_reg_rtx (tmode);
19196 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19199 op0 = copy_to_mode_reg (mode0, op0);
19200 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19201 op1 = copy_to_mode_reg (mode1, op1);
19203 pat = GEN_FCN (icode) (target, op0, op1);
19210 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19213 arm_expand_unop_builtin (enum insn_code icode,
19214 tree exp, rtx target, int do_load)
19217 tree arg0 = CALL_EXPR_ARG (exp, 0);
19218 rtx op0 = expand_normal (arg0);
19219 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19220 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19223 || GET_MODE (target) != tmode
19224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19225 target = gen_reg_rtx (tmode);
19227 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19230 if (VECTOR_MODE_P (mode0))
19231 op0 = safe_vector_operand (op0, mode0);
19233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19234 op0 = copy_to_mode_reg (mode0, op0);
19237 pat = GEN_FCN (icode) (target, op0);
19245 neon_builtin_compare (const void *a, const void *b)
19247 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19248 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19249 unsigned int soughtcode = key->base_fcode;
19251 if (soughtcode >= memb->base_fcode
19252 && soughtcode < memb->base_fcode + memb->num_vars)
19254 else if (soughtcode < memb->base_fcode)
19260 static enum insn_code
19261 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19263 neon_builtin_datum key
19264 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19265 neon_builtin_datum *found;
19268 key.base_fcode = fcode;
19269 found = (neon_builtin_datum *)
19270 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19271 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19272 gcc_assert (found);
19273 idx = fcode - (int) found->base_fcode;
19274 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19277 *itype = found->itype;
19279 return found->codes[idx];
19283 NEON_ARG_COPY_TO_REG,
19288 #define NEON_MAX_BUILTIN_ARGS 5
19290 /* Expand a Neon builtin. */
19292 arm_expand_neon_args (rtx target, int icode, int have_retval,
19297 tree arg[NEON_MAX_BUILTIN_ARGS];
19298 rtx op[NEON_MAX_BUILTIN_ARGS];
19299 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19300 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19305 || GET_MODE (target) != tmode
19306 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19307 target = gen_reg_rtx (tmode);
19309 va_start (ap, exp);
19313 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19315 if (thisarg == NEON_ARG_STOP)
19319 arg[argc] = CALL_EXPR_ARG (exp, argc);
19320 op[argc] = expand_normal (arg[argc]);
19321 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19325 case NEON_ARG_COPY_TO_REG:
19326 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19327 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19328 (op[argc], mode[argc]))
19329 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19332 case NEON_ARG_CONSTANT:
19333 /* FIXME: This error message is somewhat unhelpful. */
19334 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19335 (op[argc], mode[argc]))
19336 error ("argument must be a constant");
19339 case NEON_ARG_STOP:
19340 gcc_unreachable ();
19353 pat = GEN_FCN (icode) (target, op[0]);
19357 pat = GEN_FCN (icode) (target, op[0], op[1]);
19361 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19365 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19369 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19373 gcc_unreachable ();
19379 pat = GEN_FCN (icode) (op[0]);
19383 pat = GEN_FCN (icode) (op[0], op[1]);
19387 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19391 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19395 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19399 gcc_unreachable ();
19410 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19411 constants defined per-instruction or per instruction-variant. Instead, the
19412 required info is looked up in the table neon_builtin_data. */
19414 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19417 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19424 return arm_expand_neon_args (target, icode, 1, exp,
19425 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19429 case NEON_SCALARMUL:
19430 case NEON_SCALARMULL:
19431 case NEON_SCALARMULH:
19432 case NEON_SHIFTINSERT:
19433 case NEON_LOGICBINOP:
19434 return arm_expand_neon_args (target, icode, 1, exp,
19435 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19439 return arm_expand_neon_args (target, icode, 1, exp,
19440 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19441 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19445 case NEON_SHIFTIMM:
19446 return arm_expand_neon_args (target, icode, 1, exp,
19447 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19451 return arm_expand_neon_args (target, icode, 1, exp,
19452 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19456 case NEON_REINTERP:
19457 return arm_expand_neon_args (target, icode, 1, exp,
19458 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19462 return arm_expand_neon_args (target, icode, 1, exp,
19463 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19465 case NEON_RESULTPAIR:
19466 return arm_expand_neon_args (target, icode, 0, exp,
19467 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19471 case NEON_LANEMULL:
19472 case NEON_LANEMULH:
19473 return arm_expand_neon_args (target, icode, 1, exp,
19474 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19475 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19478 return arm_expand_neon_args (target, icode, 1, exp,
19479 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19480 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19482 case NEON_SHIFTACC:
19483 return arm_expand_neon_args (target, icode, 1, exp,
19484 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19485 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19487 case NEON_SCALARMAC:
19488 return arm_expand_neon_args (target, icode, 1, exp,
19489 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19490 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19494 return arm_expand_neon_args (target, icode, 1, exp,
19495 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19499 case NEON_LOADSTRUCT:
19500 return arm_expand_neon_args (target, icode, 1, exp,
19501 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19503 case NEON_LOAD1LANE:
19504 case NEON_LOADSTRUCTLANE:
19505 return arm_expand_neon_args (target, icode, 1, exp,
19506 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19510 case NEON_STORESTRUCT:
19511 return arm_expand_neon_args (target, icode, 0, exp,
19512 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19514 case NEON_STORE1LANE:
19515 case NEON_STORESTRUCTLANE:
19516 return arm_expand_neon_args (target, icode, 0, exp,
19517 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19521 gcc_unreachable ();
19524 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19526 neon_reinterpret (rtx dest, rtx src)
19528 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19531 /* Emit code to place a Neon pair result in memory locations (with equal
19534 neon_emit_pair_result_insn (enum machine_mode mode,
19535 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19538 rtx mem = gen_rtx_MEM (mode, destaddr);
19539 rtx tmp1 = gen_reg_rtx (mode);
19540 rtx tmp2 = gen_reg_rtx (mode);
19542 emit_insn (intfn (tmp1, op1, tmp2, op2));
19544 emit_move_insn (mem, tmp1);
19545 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19546 emit_move_insn (mem, tmp2);
19549 /* Set up operands for a register copy from src to dest, taking care not to
19550 clobber registers in the process.
19551 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19552 be called with a large N, so that should be OK. */
19555 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19557 unsigned int copied = 0, opctr = 0;
19558 unsigned int done = (1 << count) - 1;
19561 while (copied != done)
19563 for (i = 0; i < count; i++)
19567 for (j = 0; good && j < count; j++)
19568 if (i != j && (copied & (1 << j)) == 0
19569 && reg_overlap_mentioned_p (src[j], dest[i]))
19574 operands[opctr++] = dest[i];
19575 operands[opctr++] = src[i];
19581 gcc_assert (opctr == count * 2);
19584 /* Expand an expression EXP that calls a built-in function,
19585 with result going to TARGET if that's convenient
19586 (and in mode MODE if that's convenient).
19587 SUBTARGET may be used as the target for computing one of EXP's operands.
19588 IGNORE is nonzero if the value is to be ignored. */
19591 arm_expand_builtin (tree exp,
19593 rtx subtarget ATTRIBUTE_UNUSED,
19594 enum machine_mode mode ATTRIBUTE_UNUSED,
19595 int ignore ATTRIBUTE_UNUSED)
19597 const struct builtin_description * d;
19598 enum insn_code icode;
19599 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19607 int fcode = DECL_FUNCTION_CODE (fndecl);
19609 enum machine_mode tmode;
19610 enum machine_mode mode0;
19611 enum machine_mode mode1;
19612 enum machine_mode mode2;
19614 if (fcode >= ARM_BUILTIN_NEON_BASE)
19615 return arm_expand_neon_builtin (fcode, exp, target);
19619 case ARM_BUILTIN_TEXTRMSB:
19620 case ARM_BUILTIN_TEXTRMUB:
19621 case ARM_BUILTIN_TEXTRMSH:
19622 case ARM_BUILTIN_TEXTRMUH:
19623 case ARM_BUILTIN_TEXTRMSW:
19624 case ARM_BUILTIN_TEXTRMUW:
19625 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19626 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19627 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19628 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19629 : CODE_FOR_iwmmxt_textrmw);
19631 arg0 = CALL_EXPR_ARG (exp, 0);
19632 arg1 = CALL_EXPR_ARG (exp, 1);
19633 op0 = expand_normal (arg0);
19634 op1 = expand_normal (arg1);
19635 tmode = insn_data[icode].operand[0].mode;
19636 mode0 = insn_data[icode].operand[1].mode;
19637 mode1 = insn_data[icode].operand[2].mode;
19639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19640 op0 = copy_to_mode_reg (mode0, op0);
19641 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19643 /* @@@ better error message */
19644 error ("selector must be an immediate");
19645 return gen_reg_rtx (tmode);
19648 || GET_MODE (target) != tmode
19649 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19650 target = gen_reg_rtx (tmode);
19651 pat = GEN_FCN (icode) (target, op0, op1);
19657 case ARM_BUILTIN_TINSRB:
19658 case ARM_BUILTIN_TINSRH:
19659 case ARM_BUILTIN_TINSRW:
19660 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19661 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19662 : CODE_FOR_iwmmxt_tinsrw);
19663 arg0 = CALL_EXPR_ARG (exp, 0);
19664 arg1 = CALL_EXPR_ARG (exp, 1);
19665 arg2 = CALL_EXPR_ARG (exp, 2);
19666 op0 = expand_normal (arg0);
19667 op1 = expand_normal (arg1);
19668 op2 = expand_normal (arg2);
19669 tmode = insn_data[icode].operand[0].mode;
19670 mode0 = insn_data[icode].operand[1].mode;
19671 mode1 = insn_data[icode].operand[2].mode;
19672 mode2 = insn_data[icode].operand[3].mode;
19674 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19675 op0 = copy_to_mode_reg (mode0, op0);
19676 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19677 op1 = copy_to_mode_reg (mode1, op1);
19678 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19680 /* @@@ better error message */
19681 error ("selector must be an immediate");
19685 || GET_MODE (target) != tmode
19686 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19687 target = gen_reg_rtx (tmode);
19688 pat = GEN_FCN (icode) (target, op0, op1, op2);
19694 case ARM_BUILTIN_SETWCX:
19695 arg0 = CALL_EXPR_ARG (exp, 0);
19696 arg1 = CALL_EXPR_ARG (exp, 1);
19697 op0 = force_reg (SImode, expand_normal (arg0));
19698 op1 = expand_normal (arg1);
19699 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19702 case ARM_BUILTIN_GETWCX:
19703 arg0 = CALL_EXPR_ARG (exp, 0);
19704 op0 = expand_normal (arg0);
19705 target = gen_reg_rtx (SImode);
19706 emit_insn (gen_iwmmxt_tmrc (target, op0));
19709 case ARM_BUILTIN_WSHUFH:
19710 icode = CODE_FOR_iwmmxt_wshufh;
19711 arg0 = CALL_EXPR_ARG (exp, 0);
19712 arg1 = CALL_EXPR_ARG (exp, 1);
19713 op0 = expand_normal (arg0);
19714 op1 = expand_normal (arg1);
19715 tmode = insn_data[icode].operand[0].mode;
19716 mode1 = insn_data[icode].operand[1].mode;
19717 mode2 = insn_data[icode].operand[2].mode;
19719 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19720 op0 = copy_to_mode_reg (mode1, op0);
19721 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19723 /* @@@ better error message */
19724 error ("mask must be an immediate");
19728 || GET_MODE (target) != tmode
19729 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19730 target = gen_reg_rtx (tmode);
19731 pat = GEN_FCN (icode) (target, op0, op1);
19737 case ARM_BUILTIN_WSADB:
19738 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19739 case ARM_BUILTIN_WSADH:
19740 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19741 case ARM_BUILTIN_WSADBZ:
19742 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19743 case ARM_BUILTIN_WSADHZ:
19744 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19746 /* Several three-argument builtins. */
19747 case ARM_BUILTIN_WMACS:
19748 case ARM_BUILTIN_WMACU:
19749 case ARM_BUILTIN_WALIGN:
19750 case ARM_BUILTIN_TMIA:
19751 case ARM_BUILTIN_TMIAPH:
19752 case ARM_BUILTIN_TMIATT:
19753 case ARM_BUILTIN_TMIATB:
19754 case ARM_BUILTIN_TMIABT:
19755 case ARM_BUILTIN_TMIABB:
19756 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19757 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19758 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19759 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19760 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19761 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19762 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19763 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19764 : CODE_FOR_iwmmxt_walign);
19765 arg0 = CALL_EXPR_ARG (exp, 0);
19766 arg1 = CALL_EXPR_ARG (exp, 1);
19767 arg2 = CALL_EXPR_ARG (exp, 2);
19768 op0 = expand_normal (arg0);
19769 op1 = expand_normal (arg1);
19770 op2 = expand_normal (arg2);
19771 tmode = insn_data[icode].operand[0].mode;
19772 mode0 = insn_data[icode].operand[1].mode;
19773 mode1 = insn_data[icode].operand[2].mode;
19774 mode2 = insn_data[icode].operand[3].mode;
19776 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19777 op0 = copy_to_mode_reg (mode0, op0);
19778 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19779 op1 = copy_to_mode_reg (mode1, op1);
19780 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19781 op2 = copy_to_mode_reg (mode2, op2);
19783 || GET_MODE (target) != tmode
19784 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19785 target = gen_reg_rtx (tmode);
19786 pat = GEN_FCN (icode) (target, op0, op1, op2);
19792 case ARM_BUILTIN_WZERO:
19793 target = gen_reg_rtx (DImode);
19794 emit_insn (gen_iwmmxt_clrdi (target));
19797 case ARM_BUILTIN_THREAD_POINTER:
19798 return arm_load_tp (target);
19804 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19805 if (d->code == (const enum arm_builtins) fcode)
19806 return arm_expand_binop_builtin (d->icode, exp, target);
19808 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19809 if (d->code == (const enum arm_builtins) fcode)
19810 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19812 /* @@@ Should really do something sensible here. */
19816 /* Return the number (counting from 0) of
19817 the least significant set bit in MASK. */
19820 number_of_first_bit_set (unsigned mask)
19825 (mask & (1 << bit)) == 0;
19832 /* Emit code to push or pop registers to or from the stack. F is the
19833 assembly file. MASK is the registers to push or pop. PUSH is
19834 nonzero if we should push, and zero if we should pop. For debugging
19835 output, if pushing, adjust CFA_OFFSET by the amount of space added
19836 to the stack. REAL_REGS should have the same number of bits set as
19837 MASK, and will be used instead (in the same order) to describe which
19838 registers were saved - this is used to mark the save slots when we
19839 push high registers after moving them to low registers. */
19841 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19842 unsigned long real_regs)
19845 int lo_mask = mask & 0xFF;
19846 int pushed_words = 0;
19850 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19852 /* Special case. Do not generate a POP PC statement here, do it in
19854 thumb_exit (f, -1);
19858 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19860 fprintf (f, "\t.save\t{");
19861 for (regno = 0; regno < 15; regno++)
19863 if (real_regs & (1 << regno))
19865 if (real_regs & ((1 << regno) -1))
19867 asm_fprintf (f, "%r", regno);
19870 fprintf (f, "}\n");
19873 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19875 /* Look at the low registers first. */
19876 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19880 asm_fprintf (f, "%r", regno);
19882 if ((lo_mask & ~1) != 0)
19889 if (push && (mask & (1 << LR_REGNUM)))
19891 /* Catch pushing the LR. */
19895 asm_fprintf (f, "%r", LR_REGNUM);
19899 else if (!push && (mask & (1 << PC_REGNUM)))
19901 /* Catch popping the PC. */
19902 if (TARGET_INTERWORK || TARGET_BACKTRACE
19903 || crtl->calls_eh_return)
19905 /* The PC is never poped directly, instead
19906 it is popped into r3 and then BX is used. */
19907 fprintf (f, "}\n");
19909 thumb_exit (f, -1);
19918 asm_fprintf (f, "%r", PC_REGNUM);
19922 fprintf (f, "}\n");
19924 if (push && pushed_words && dwarf2out_do_frame ())
19926 char *l = dwarf2out_cfi_label (false);
19927 int pushed_mask = real_regs;
19929 *cfa_offset += pushed_words * 4;
19930 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19933 pushed_mask = real_regs;
19934 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19936 if (pushed_mask & 1)
19937 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19942 /* Generate code to return from a thumb function.
19943 If 'reg_containing_return_addr' is -1, then the return address is
19944 actually on the stack, at the stack pointer. */
19946 thumb_exit (FILE *f, int reg_containing_return_addr)
19948 unsigned regs_available_for_popping;
19949 unsigned regs_to_pop;
19951 unsigned available;
19955 int restore_a4 = FALSE;
19957 /* Compute the registers we need to pop. */
19961 if (reg_containing_return_addr == -1)
19963 regs_to_pop |= 1 << LR_REGNUM;
19967 if (TARGET_BACKTRACE)
19969 /* Restore the (ARM) frame pointer and stack pointer. */
19970 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19974 /* If there is nothing to pop then just emit the BX instruction and
19976 if (pops_needed == 0)
19978 if (crtl->calls_eh_return)
19979 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19981 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19984 /* Otherwise if we are not supporting interworking and we have not created
19985 a backtrace structure and the function was not entered in ARM mode then
19986 just pop the return address straight into the PC. */
19987 else if (!TARGET_INTERWORK
19988 && !TARGET_BACKTRACE
19989 && !is_called_in_ARM_mode (current_function_decl)
19990 && !crtl->calls_eh_return)
19992 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19996 /* Find out how many of the (return) argument registers we can corrupt. */
19997 regs_available_for_popping = 0;
19999 /* If returning via __builtin_eh_return, the bottom three registers
20000 all contain information needed for the return. */
20001 if (crtl->calls_eh_return)
20005 /* If we can deduce the registers used from the function's
20006 return value. This is more reliable that examining
20007 df_regs_ever_live_p () because that will be set if the register is
20008 ever used in the function, not just if the register is used
20009 to hold a return value. */
20011 if (crtl->return_rtx != 0)
20012 mode = GET_MODE (crtl->return_rtx);
20014 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20016 size = GET_MODE_SIZE (mode);
20020 /* In a void function we can use any argument register.
20021 In a function that returns a structure on the stack
20022 we can use the second and third argument registers. */
20023 if (mode == VOIDmode)
20024 regs_available_for_popping =
20025 (1 << ARG_REGISTER (1))
20026 | (1 << ARG_REGISTER (2))
20027 | (1 << ARG_REGISTER (3));
20029 regs_available_for_popping =
20030 (1 << ARG_REGISTER (2))
20031 | (1 << ARG_REGISTER (3));
20033 else if (size <= 4)
20034 regs_available_for_popping =
20035 (1 << ARG_REGISTER (2))
20036 | (1 << ARG_REGISTER (3));
20037 else if (size <= 8)
20038 regs_available_for_popping =
20039 (1 << ARG_REGISTER (3));
20042 /* Match registers to be popped with registers into which we pop them. */
20043 for (available = regs_available_for_popping,
20044 required = regs_to_pop;
20045 required != 0 && available != 0;
20046 available &= ~(available & - available),
20047 required &= ~(required & - required))
20050 /* If we have any popping registers left over, remove them. */
20052 regs_available_for_popping &= ~available;
20054 /* Otherwise if we need another popping register we can use
20055 the fourth argument register. */
20056 else if (pops_needed)
20058 /* If we have not found any free argument registers and
20059 reg a4 contains the return address, we must move it. */
20060 if (regs_available_for_popping == 0
20061 && reg_containing_return_addr == LAST_ARG_REGNUM)
20063 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20064 reg_containing_return_addr = LR_REGNUM;
20066 else if (size > 12)
20068 /* Register a4 is being used to hold part of the return value,
20069 but we have dire need of a free, low register. */
20072 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20075 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20077 /* The fourth argument register is available. */
20078 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20084 /* Pop as many registers as we can. */
20085 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20086 regs_available_for_popping);
20088 /* Process the registers we popped. */
20089 if (reg_containing_return_addr == -1)
20091 /* The return address was popped into the lowest numbered register. */
20092 regs_to_pop &= ~(1 << LR_REGNUM);
20094 reg_containing_return_addr =
20095 number_of_first_bit_set (regs_available_for_popping);
20097 /* Remove this register for the mask of available registers, so that
20098 the return address will not be corrupted by further pops. */
20099 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20102 /* If we popped other registers then handle them here. */
20103 if (regs_available_for_popping)
20107 /* Work out which register currently contains the frame pointer. */
20108 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20110 /* Move it into the correct place. */
20111 asm_fprintf (f, "\tmov\t%r, %r\n",
20112 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20114 /* (Temporarily) remove it from the mask of popped registers. */
20115 regs_available_for_popping &= ~(1 << frame_pointer);
20116 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20118 if (regs_available_for_popping)
20122 /* We popped the stack pointer as well,
20123 find the register that contains it. */
20124 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20126 /* Move it into the stack register. */
20127 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20129 /* At this point we have popped all necessary registers, so
20130 do not worry about restoring regs_available_for_popping
20131 to its correct value:
20133 assert (pops_needed == 0)
20134 assert (regs_available_for_popping == (1 << frame_pointer))
20135 assert (regs_to_pop == (1 << STACK_POINTER)) */
20139 /* Since we have just move the popped value into the frame
20140 pointer, the popping register is available for reuse, and
20141 we know that we still have the stack pointer left to pop. */
20142 regs_available_for_popping |= (1 << frame_pointer);
20146 /* If we still have registers left on the stack, but we no longer have
20147 any registers into which we can pop them, then we must move the return
20148 address into the link register and make available the register that
20150 if (regs_available_for_popping == 0 && pops_needed > 0)
20152 regs_available_for_popping |= 1 << reg_containing_return_addr;
20154 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20155 reg_containing_return_addr);
20157 reg_containing_return_addr = LR_REGNUM;
20160 /* If we have registers left on the stack then pop some more.
20161 We know that at most we will want to pop FP and SP. */
20162 if (pops_needed > 0)
20167 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20168 regs_available_for_popping);
20170 /* We have popped either FP or SP.
20171 Move whichever one it is into the correct register. */
20172 popped_into = number_of_first_bit_set (regs_available_for_popping);
20173 move_to = number_of_first_bit_set (regs_to_pop);
20175 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20177 regs_to_pop &= ~(1 << move_to);
20182 /* If we still have not popped everything then we must have only
20183 had one register available to us and we are now popping the SP. */
20184 if (pops_needed > 0)
20188 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20189 regs_available_for_popping);
20191 popped_into = number_of_first_bit_set (regs_available_for_popping);
20193 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20195 assert (regs_to_pop == (1 << STACK_POINTER))
20196 assert (pops_needed == 1)
20200 /* If necessary restore the a4 register. */
20203 if (reg_containing_return_addr != LR_REGNUM)
20205 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20206 reg_containing_return_addr = LR_REGNUM;
20209 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20212 if (crtl->calls_eh_return)
20213 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20215 /* Return to caller. */
20216 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20219 /* Scan INSN just before assembler is output for it.
20220 For Thumb-1, we track the status of the condition codes; this
20221 information is used in the cbranchsi4_insn pattern. */
20223 thumb1_final_prescan_insn (rtx insn)
20225 if (flag_print_asm_name)
20226 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20227 INSN_ADDRESSES (INSN_UID (insn)));
20228 /* Don't overwrite the previous setter when we get to a cbranch. */
20229 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20231 enum attr_conds conds;
20233 if (cfun->machine->thumb1_cc_insn)
20235 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20236 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20239 conds = get_attr_conds (insn);
20240 if (conds == CONDS_SET)
20242 rtx set = single_set (insn);
20243 cfun->machine->thumb1_cc_insn = insn;
20244 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20245 cfun->machine->thumb1_cc_op1 = const0_rtx;
20246 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20247 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20249 rtx src1 = XEXP (SET_SRC (set), 1);
20250 if (src1 == const0_rtx)
20251 cfun->machine->thumb1_cc_mode = CCmode;
20254 else if (conds != CONDS_NOCOND)
20255 cfun->machine->thumb1_cc_insn = NULL_RTX;
20260 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20262 unsigned HOST_WIDE_INT mask = 0xff;
20265 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20266 if (val == 0) /* XXX */
20269 for (i = 0; i < 25; i++)
20270 if ((val & (mask << i)) == val)
20276 /* Returns nonzero if the current function contains,
20277 or might contain a far jump. */
20279 thumb_far_jump_used_p (void)
20283 /* This test is only important for leaf functions. */
20284 /* assert (!leaf_function_p ()); */
20286 /* If we have already decided that far jumps may be used,
20287 do not bother checking again, and always return true even if
20288 it turns out that they are not being used. Once we have made
20289 the decision that far jumps are present (and that hence the link
20290 register will be pushed onto the stack) we cannot go back on it. */
20291 if (cfun->machine->far_jump_used)
20294 /* If this function is not being called from the prologue/epilogue
20295 generation code then it must be being called from the
20296 INITIAL_ELIMINATION_OFFSET macro. */
20297 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20299 /* In this case we know that we are being asked about the elimination
20300 of the arg pointer register. If that register is not being used,
20301 then there are no arguments on the stack, and we do not have to
20302 worry that a far jump might force the prologue to push the link
20303 register, changing the stack offsets. In this case we can just
20304 return false, since the presence of far jumps in the function will
20305 not affect stack offsets.
20307 If the arg pointer is live (or if it was live, but has now been
20308 eliminated and so set to dead) then we do have to test to see if
20309 the function might contain a far jump. This test can lead to some
20310 false negatives, since before reload is completed, then length of
20311 branch instructions is not known, so gcc defaults to returning their
20312 longest length, which in turn sets the far jump attribute to true.
20314 A false negative will not result in bad code being generated, but it
20315 will result in a needless push and pop of the link register. We
20316 hope that this does not occur too often.
20318 If we need doubleword stack alignment this could affect the other
20319 elimination offsets so we can't risk getting it wrong. */
20320 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20321 cfun->machine->arg_pointer_live = 1;
20322 else if (!cfun->machine->arg_pointer_live)
20326 /* Check to see if the function contains a branch
20327 insn with the far jump attribute set. */
20328 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20330 if (GET_CODE (insn) == JUMP_INSN
20331 /* Ignore tablejump patterns. */
20332 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20333 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20334 && get_attr_far_jump (insn) == FAR_JUMP_YES
20337 /* Record the fact that we have decided that
20338 the function does use far jumps. */
20339 cfun->machine->far_jump_used = 1;
20347 /* Return nonzero if FUNC must be entered in ARM mode. */
20349 is_called_in_ARM_mode (tree func)
20351 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20353 /* Ignore the problem about functions whose address is taken. */
20354 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20358 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20364 /* Given the stack offsets and register mask in OFFSETS, decide how
20365 many additional registers to push instead of subtracting a constant
20366 from SP. For epilogues the principle is the same except we use pop.
20367 FOR_PROLOGUE indicates which we're generating. */
20369 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20371 HOST_WIDE_INT amount;
20372 unsigned long live_regs_mask = offsets->saved_regs_mask;
20373 /* Extract a mask of the ones we can give to the Thumb's push/pop
20375 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20376 /* Then count how many other high registers will need to be pushed. */
20377 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20378 int n_free, reg_base;
20380 if (!for_prologue && frame_pointer_needed)
20381 amount = offsets->locals_base - offsets->saved_regs;
20383 amount = offsets->outgoing_args - offsets->saved_regs;
20385 /* If the stack frame size is 512 exactly, we can save one load
20386 instruction, which should make this a win even when optimizing
20388 if (!optimize_size && amount != 512)
20391 /* Can't do this if there are high registers to push. */
20392 if (high_regs_pushed != 0)
20395 /* Shouldn't do it in the prologue if no registers would normally
20396 be pushed at all. In the epilogue, also allow it if we'll have
20397 a pop insn for the PC. */
20400 || TARGET_BACKTRACE
20401 || (live_regs_mask & 1 << LR_REGNUM) == 0
20402 || TARGET_INTERWORK
20403 || crtl->args.pretend_args_size != 0))
20406 /* Don't do this if thumb_expand_prologue wants to emit instructions
20407 between the push and the stack frame allocation. */
20409 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20410 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20417 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20418 live_regs_mask >>= reg_base;
20421 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20422 && (for_prologue || call_used_regs[reg_base + n_free]))
20424 live_regs_mask >>= 1;
20430 gcc_assert (amount / 4 * 4 == amount);
20432 if (amount >= 512 && (amount - n_free * 4) < 512)
20433 return (amount - 508) / 4;
20434 if (amount <= n_free * 4)
20439 /* The bits which aren't usefully expanded as rtl. */
20441 thumb_unexpanded_epilogue (void)
20443 arm_stack_offsets *offsets;
20445 unsigned long live_regs_mask = 0;
20446 int high_regs_pushed = 0;
20448 int had_to_push_lr;
20451 if (cfun->machine->return_used_this_function != 0)
20454 if (IS_NAKED (arm_current_func_type ()))
20457 offsets = arm_get_frame_offsets ();
20458 live_regs_mask = offsets->saved_regs_mask;
20459 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20461 /* If we can deduce the registers used from the function's return value.
20462 This is more reliable that examining df_regs_ever_live_p () because that
20463 will be set if the register is ever used in the function, not just if
20464 the register is used to hold a return value. */
20465 size = arm_size_return_regs ();
20467 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20470 unsigned long extra_mask = (1 << extra_pop) - 1;
20471 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20474 /* The prolog may have pushed some high registers to use as
20475 work registers. e.g. the testsuite file:
20476 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20477 compiles to produce:
20478 push {r4, r5, r6, r7, lr}
20482 as part of the prolog. We have to undo that pushing here. */
20484 if (high_regs_pushed)
20486 unsigned long mask = live_regs_mask & 0xff;
20489 /* The available low registers depend on the size of the value we are
20497 /* Oh dear! We have no low registers into which we can pop
20500 ("no low registers available for popping high registers");
20502 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20503 if (live_regs_mask & (1 << next_hi_reg))
20506 while (high_regs_pushed)
20508 /* Find lo register(s) into which the high register(s) can
20510 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20512 if (mask & (1 << regno))
20513 high_regs_pushed--;
20514 if (high_regs_pushed == 0)
20518 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20520 /* Pop the values into the low register(s). */
20521 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20523 /* Move the value(s) into the high registers. */
20524 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20526 if (mask & (1 << regno))
20528 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20531 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20532 if (live_regs_mask & (1 << next_hi_reg))
20537 live_regs_mask &= ~0x0f00;
20540 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20541 live_regs_mask &= 0xff;
20543 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20545 /* Pop the return address into the PC. */
20546 if (had_to_push_lr)
20547 live_regs_mask |= 1 << PC_REGNUM;
20549 /* Either no argument registers were pushed or a backtrace
20550 structure was created which includes an adjusted stack
20551 pointer, so just pop everything. */
20552 if (live_regs_mask)
20553 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20556 /* We have either just popped the return address into the
20557 PC or it is was kept in LR for the entire function.
20558 Note that thumb_pushpop has already called thumb_exit if the
20559 PC was in the list. */
20560 if (!had_to_push_lr)
20561 thumb_exit (asm_out_file, LR_REGNUM);
20565 /* Pop everything but the return address. */
20566 if (live_regs_mask)
20567 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20570 if (had_to_push_lr)
20574 /* We have no free low regs, so save one. */
20575 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20579 /* Get the return address into a temporary register. */
20580 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20581 1 << LAST_ARG_REGNUM);
20585 /* Move the return address to lr. */
20586 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20588 /* Restore the low register. */
20589 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20594 regno = LAST_ARG_REGNUM;
20599 /* Remove the argument registers that were pushed onto the stack. */
20600 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20601 SP_REGNUM, SP_REGNUM,
20602 crtl->args.pretend_args_size);
20604 thumb_exit (asm_out_file, regno);
20610 /* Functions to save and restore machine-specific function data. */
20611 static struct machine_function *
20612 arm_init_machine_status (void)
20614 struct machine_function *machine;
20615 machine = ggc_alloc_cleared_machine_function ();
20617 #if ARM_FT_UNKNOWN != 0
20618 machine->func_type = ARM_FT_UNKNOWN;
20623 /* Return an RTX indicating where the return address to the
20624 calling function can be found. */
20626 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20631 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20634 /* Do anything needed before RTL is emitted for each function. */
20636 arm_init_expanders (void)
20638 /* Arrange to initialize and mark the machine per-function status. */
20639 init_machine_status = arm_init_machine_status;
20641 /* This is to stop the combine pass optimizing away the alignment
20642 adjustment of va_arg. */
20643 /* ??? It is claimed that this should not be necessary. */
20645 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20649 /* Like arm_compute_initial_elimination offset. Simpler because there
20650 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20651 to point at the base of the local variables after static stack
20652 space for a function has been allocated. */
20655 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20657 arm_stack_offsets *offsets;
20659 offsets = arm_get_frame_offsets ();
20663 case ARG_POINTER_REGNUM:
20666 case STACK_POINTER_REGNUM:
20667 return offsets->outgoing_args - offsets->saved_args;
20669 case FRAME_POINTER_REGNUM:
20670 return offsets->soft_frame - offsets->saved_args;
20672 case ARM_HARD_FRAME_POINTER_REGNUM:
20673 return offsets->saved_regs - offsets->saved_args;
20675 case THUMB_HARD_FRAME_POINTER_REGNUM:
20676 return offsets->locals_base - offsets->saved_args;
20679 gcc_unreachable ();
20683 case FRAME_POINTER_REGNUM:
20686 case STACK_POINTER_REGNUM:
20687 return offsets->outgoing_args - offsets->soft_frame;
20689 case ARM_HARD_FRAME_POINTER_REGNUM:
20690 return offsets->saved_regs - offsets->soft_frame;
20692 case THUMB_HARD_FRAME_POINTER_REGNUM:
20693 return offsets->locals_base - offsets->soft_frame;
20696 gcc_unreachable ();
20701 gcc_unreachable ();
20705 /* Generate the rest of a function's prologue. */
20707 thumb1_expand_prologue (void)
20711 HOST_WIDE_INT amount;
20712 arm_stack_offsets *offsets;
20713 unsigned long func_type;
20715 unsigned long live_regs_mask;
20717 func_type = arm_current_func_type ();
20719 /* Naked functions don't have prologues. */
20720 if (IS_NAKED (func_type))
20723 if (IS_INTERRUPT (func_type))
20725 error ("interrupt Service Routines cannot be coded in Thumb mode");
20729 offsets = arm_get_frame_offsets ();
20730 live_regs_mask = offsets->saved_regs_mask;
20731 /* Load the pic register before setting the frame pointer,
20732 so we can use r7 as a temporary work register. */
20733 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20734 arm_load_pic_register (live_regs_mask);
20736 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20737 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20738 stack_pointer_rtx);
20740 if (flag_stack_usage)
20741 current_function_static_stack_size
20742 = offsets->outgoing_args - offsets->saved_args;
20744 amount = offsets->outgoing_args - offsets->saved_regs;
20745 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20750 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20751 GEN_INT (- amount)));
20752 RTX_FRAME_RELATED_P (insn) = 1;
20758 /* The stack decrement is too big for an immediate value in a single
20759 insn. In theory we could issue multiple subtracts, but after
20760 three of them it becomes more space efficient to place the full
20761 value in the constant pool and load into a register. (Also the
20762 ARM debugger really likes to see only one stack decrement per
20763 function). So instead we look for a scratch register into which
20764 we can load the decrement, and then we subtract this from the
20765 stack pointer. Unfortunately on the thumb the only available
20766 scratch registers are the argument registers, and we cannot use
20767 these as they may hold arguments to the function. Instead we
20768 attempt to locate a call preserved register which is used by this
20769 function. If we can find one, then we know that it will have
20770 been pushed at the start of the prologue and so we can corrupt
20772 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20773 if (live_regs_mask & (1 << regno))
20776 gcc_assert(regno <= LAST_LO_REGNUM);
20778 reg = gen_rtx_REG (SImode, regno);
20780 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20782 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20783 stack_pointer_rtx, reg));
20784 RTX_FRAME_RELATED_P (insn) = 1;
20785 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20786 plus_constant (stack_pointer_rtx,
20788 RTX_FRAME_RELATED_P (dwarf) = 1;
20789 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20793 if (frame_pointer_needed)
20794 thumb_set_frame_pointer (offsets);
20796 /* If we are profiling, make sure no instructions are scheduled before
20797 the call to mcount. Similarly if the user has requested no
20798 scheduling in the prolog. Similarly if we want non-call exceptions
20799 using the EABI unwinder, to prevent faulting instructions from being
20800 swapped with a stack adjustment. */
20801 if (crtl->profile || !TARGET_SCHED_PROLOG
20802 || (arm_except_unwind_info (&global_options) == UI_TARGET
20803 && cfun->can_throw_non_call_exceptions))
20804 emit_insn (gen_blockage ());
20806 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20807 if (live_regs_mask & 0xff)
20808 cfun->machine->lr_save_eliminated = 0;
20813 thumb1_expand_epilogue (void)
20815 HOST_WIDE_INT amount;
20816 arm_stack_offsets *offsets;
20819 /* Naked functions don't have prologues. */
20820 if (IS_NAKED (arm_current_func_type ()))
20823 offsets = arm_get_frame_offsets ();
20824 amount = offsets->outgoing_args - offsets->saved_regs;
20826 if (frame_pointer_needed)
20828 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20829 amount = offsets->locals_base - offsets->saved_regs;
20831 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20833 gcc_assert (amount >= 0);
20837 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20838 GEN_INT (amount)));
20841 /* r3 is always free in the epilogue. */
20842 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20844 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20845 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20849 /* Emit a USE (stack_pointer_rtx), so that
20850 the stack adjustment will not be deleted. */
20851 emit_insn (gen_prologue_use (stack_pointer_rtx));
20853 if (crtl->profile || !TARGET_SCHED_PROLOG)
20854 emit_insn (gen_blockage ());
20856 /* Emit a clobber for each insn that will be restored in the epilogue,
20857 so that flow2 will get register lifetimes correct. */
20858 for (regno = 0; regno < 13; regno++)
20859 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20860 emit_clobber (gen_rtx_REG (SImode, regno));
20862 if (! df_regs_ever_live_p (LR_REGNUM))
20863 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20867 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20869 arm_stack_offsets *offsets;
20870 unsigned long live_regs_mask = 0;
20871 unsigned long l_mask;
20872 unsigned high_regs_pushed = 0;
20873 int cfa_offset = 0;
20876 if (IS_NAKED (arm_current_func_type ()))
20879 if (is_called_in_ARM_mode (current_function_decl))
20883 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20884 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20886 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20888 /* Generate code sequence to switch us into Thumb mode. */
20889 /* The .code 32 directive has already been emitted by
20890 ASM_DECLARE_FUNCTION_NAME. */
20891 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20892 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20894 /* Generate a label, so that the debugger will notice the
20895 change in instruction sets. This label is also used by
20896 the assembler to bypass the ARM code when this function
20897 is called from a Thumb encoded function elsewhere in the
20898 same file. Hence the definition of STUB_NAME here must
20899 agree with the definition in gas/config/tc-arm.c. */
20901 #define STUB_NAME ".real_start_of"
20903 fprintf (f, "\t.code\t16\n");
20905 if (arm_dllexport_name_p (name))
20906 name = arm_strip_name_encoding (name);
20908 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20909 fprintf (f, "\t.thumb_func\n");
20910 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20913 if (crtl->args.pretend_args_size)
20915 /* Output unwind directive for the stack adjustment. */
20916 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20917 fprintf (f, "\t.pad #%d\n",
20918 crtl->args.pretend_args_size);
20920 if (cfun->machine->uses_anonymous_args)
20924 fprintf (f, "\tpush\t{");
20926 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20928 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20929 regno <= LAST_ARG_REGNUM;
20931 asm_fprintf (f, "%r%s", regno,
20932 regno == LAST_ARG_REGNUM ? "" : ", ");
20934 fprintf (f, "}\n");
20937 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20938 SP_REGNUM, SP_REGNUM,
20939 crtl->args.pretend_args_size);
20941 /* We don't need to record the stores for unwinding (would it
20942 help the debugger any if we did?), but record the change in
20943 the stack pointer. */
20944 if (dwarf2out_do_frame ())
20946 char *l = dwarf2out_cfi_label (false);
20948 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20949 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20953 /* Get the registers we are going to push. */
20954 offsets = arm_get_frame_offsets ();
20955 live_regs_mask = offsets->saved_regs_mask;
20956 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20957 l_mask = live_regs_mask & 0x40ff;
20958 /* Then count how many other high registers will need to be pushed. */
20959 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20961 if (TARGET_BACKTRACE)
20964 unsigned work_register;
20966 /* We have been asked to create a stack backtrace structure.
20967 The code looks like this:
20971 0 sub SP, #16 Reserve space for 4 registers.
20972 2 push {R7} Push low registers.
20973 4 add R7, SP, #20 Get the stack pointer before the push.
20974 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20975 8 mov R7, PC Get hold of the start of this code plus 12.
20976 10 str R7, [SP, #16] Store it.
20977 12 mov R7, FP Get hold of the current frame pointer.
20978 14 str R7, [SP, #4] Store it.
20979 16 mov R7, LR Get hold of the current return address.
20980 18 str R7, [SP, #12] Store it.
20981 20 add R7, SP, #16 Point at the start of the backtrace structure.
20982 22 mov FP, R7 Put this value into the frame pointer. */
20984 work_register = thumb_find_work_register (live_regs_mask);
20986 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20987 asm_fprintf (f, "\t.pad #16\n");
20990 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20991 SP_REGNUM, SP_REGNUM);
20993 if (dwarf2out_do_frame ())
20995 char *l = dwarf2out_cfi_label (false);
20997 cfa_offset = cfa_offset + 16;
20998 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21003 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21004 offset = bit_count (l_mask) * UNITS_PER_WORD;
21009 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21010 offset + 16 + crtl->args.pretend_args_size);
21012 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21015 /* Make sure that the instruction fetching the PC is in the right place
21016 to calculate "start of backtrace creation code + 12". */
21019 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21020 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21022 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21023 ARM_HARD_FRAME_POINTER_REGNUM);
21024 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21029 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21030 ARM_HARD_FRAME_POINTER_REGNUM);
21031 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21033 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21034 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21038 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21039 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21041 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21043 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21044 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21046 /* Optimization: If we are not pushing any low registers but we are going
21047 to push some high registers then delay our first push. This will just
21048 be a push of LR and we can combine it with the push of the first high
21050 else if ((l_mask & 0xff) != 0
21051 || (high_regs_pushed == 0 && l_mask))
21053 unsigned long mask = l_mask;
21054 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21055 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21058 if (high_regs_pushed)
21060 unsigned pushable_regs;
21061 unsigned next_hi_reg;
21063 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21064 if (live_regs_mask & (1 << next_hi_reg))
21067 pushable_regs = l_mask & 0xff;
21069 if (pushable_regs == 0)
21070 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21072 while (high_regs_pushed > 0)
21074 unsigned long real_regs_mask = 0;
21076 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21078 if (pushable_regs & (1 << regno))
21080 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21082 high_regs_pushed --;
21083 real_regs_mask |= (1 << next_hi_reg);
21085 if (high_regs_pushed)
21087 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21089 if (live_regs_mask & (1 << next_hi_reg))
21094 pushable_regs &= ~((1 << regno) - 1);
21100 /* If we had to find a work register and we have not yet
21101 saved the LR then add it to the list of regs to push. */
21102 if (l_mask == (1 << LR_REGNUM))
21104 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21106 real_regs_mask | (1 << LR_REGNUM));
21110 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21115 /* Handle the case of a double word load into a low register from
21116 a computed memory address. The computed address may involve a
21117 register which is overwritten by the load. */
21119 thumb_load_double_from_address (rtx *operands)
21127 gcc_assert (GET_CODE (operands[0]) == REG);
21128 gcc_assert (GET_CODE (operands[1]) == MEM);
21130 /* Get the memory address. */
21131 addr = XEXP (operands[1], 0);
21133 /* Work out how the memory address is computed. */
21134 switch (GET_CODE (addr))
21137 operands[2] = adjust_address (operands[1], SImode, 4);
21139 if (REGNO (operands[0]) == REGNO (addr))
21141 output_asm_insn ("ldr\t%H0, %2", operands);
21142 output_asm_insn ("ldr\t%0, %1", operands);
21146 output_asm_insn ("ldr\t%0, %1", operands);
21147 output_asm_insn ("ldr\t%H0, %2", operands);
21152 /* Compute <address> + 4 for the high order load. */
21153 operands[2] = adjust_address (operands[1], SImode, 4);
21155 output_asm_insn ("ldr\t%0, %1", operands);
21156 output_asm_insn ("ldr\t%H0, %2", operands);
21160 arg1 = XEXP (addr, 0);
21161 arg2 = XEXP (addr, 1);
21163 if (CONSTANT_P (arg1))
21164 base = arg2, offset = arg1;
21166 base = arg1, offset = arg2;
21168 gcc_assert (GET_CODE (base) == REG);
21170 /* Catch the case of <address> = <reg> + <reg> */
21171 if (GET_CODE (offset) == REG)
21173 int reg_offset = REGNO (offset);
21174 int reg_base = REGNO (base);
21175 int reg_dest = REGNO (operands[0]);
21177 /* Add the base and offset registers together into the
21178 higher destination register. */
21179 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21180 reg_dest + 1, reg_base, reg_offset);
21182 /* Load the lower destination register from the address in
21183 the higher destination register. */
21184 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21185 reg_dest, reg_dest + 1);
21187 /* Load the higher destination register from its own address
21189 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21190 reg_dest + 1, reg_dest + 1);
21194 /* Compute <address> + 4 for the high order load. */
21195 operands[2] = adjust_address (operands[1], SImode, 4);
21197 /* If the computed address is held in the low order register
21198 then load the high order register first, otherwise always
21199 load the low order register first. */
21200 if (REGNO (operands[0]) == REGNO (base))
21202 output_asm_insn ("ldr\t%H0, %2", operands);
21203 output_asm_insn ("ldr\t%0, %1", operands);
21207 output_asm_insn ("ldr\t%0, %1", operands);
21208 output_asm_insn ("ldr\t%H0, %2", operands);
21214 /* With no registers to worry about we can just load the value
21216 operands[2] = adjust_address (operands[1], SImode, 4);
21218 output_asm_insn ("ldr\t%H0, %2", operands);
21219 output_asm_insn ("ldr\t%0, %1", operands);
21223 gcc_unreachable ();
21230 thumb_output_move_mem_multiple (int n, rtx *operands)
21237 if (REGNO (operands[4]) > REGNO (operands[5]))
21240 operands[4] = operands[5];
21243 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21244 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21248 if (REGNO (operands[4]) > REGNO (operands[5]))
21251 operands[4] = operands[5];
21254 if (REGNO (operands[5]) > REGNO (operands[6]))
21257 operands[5] = operands[6];
21260 if (REGNO (operands[4]) > REGNO (operands[5]))
21263 operands[4] = operands[5];
21267 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21268 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21272 gcc_unreachable ();
21278 /* Output a call-via instruction for thumb state. */
21280 thumb_call_via_reg (rtx reg)
21282 int regno = REGNO (reg);
21285 gcc_assert (regno < LR_REGNUM);
21287 /* If we are in the normal text section we can use a single instance
21288 per compilation unit. If we are doing function sections, then we need
21289 an entry per section, since we can't rely on reachability. */
21290 if (in_section == text_section)
21292 thumb_call_reg_needed = 1;
21294 if (thumb_call_via_label[regno] == NULL)
21295 thumb_call_via_label[regno] = gen_label_rtx ();
21296 labelp = thumb_call_via_label + regno;
21300 if (cfun->machine->call_via[regno] == NULL)
21301 cfun->machine->call_via[regno] = gen_label_rtx ();
21302 labelp = cfun->machine->call_via + regno;
21305 output_asm_insn ("bl\t%a0", labelp);
21309 /* Routines for generating rtl. */
21311 thumb_expand_movmemqi (rtx *operands)
21313 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21314 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21315 HOST_WIDE_INT len = INTVAL (operands[2]);
21316 HOST_WIDE_INT offset = 0;
21320 emit_insn (gen_movmem12b (out, in, out, in));
21326 emit_insn (gen_movmem8b (out, in, out, in));
21332 rtx reg = gen_reg_rtx (SImode);
21333 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21334 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21341 rtx reg = gen_reg_rtx (HImode);
21342 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21343 plus_constant (in, offset))));
21344 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21352 rtx reg = gen_reg_rtx (QImode);
21353 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21354 plus_constant (in, offset))));
21355 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21361 thumb_reload_out_hi (rtx *operands)
21363 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21366 /* Handle reading a half-word from memory during reload. */
21368 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21370 gcc_unreachable ();
21373 /* Return the length of a function name prefix
21374 that starts with the character 'c'. */
21376 arm_get_strip_length (int c)
21380 ARM_NAME_ENCODING_LENGTHS
21385 /* Return a pointer to a function's name with any
21386 and all prefix encodings stripped from it. */
21388 arm_strip_name_encoding (const char *name)
21392 while ((skip = arm_get_strip_length (* name)))
21398 /* If there is a '*' anywhere in the name's prefix, then
21399 emit the stripped name verbatim, otherwise prepend an
21400 underscore if leading underscores are being used. */
21402 arm_asm_output_labelref (FILE *stream, const char *name)
21407 while ((skip = arm_get_strip_length (* name)))
21409 verbatim |= (*name == '*');
21414 fputs (name, stream);
21416 asm_fprintf (stream, "%U%s", name);
21420 arm_file_start (void)
21424 if (TARGET_UNIFIED_ASM)
21425 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21429 const char *fpu_name;
21430 if (arm_selected_arch)
21431 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21433 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21435 if (TARGET_SOFT_FLOAT)
21438 fpu_name = "softvfp";
21440 fpu_name = "softfpa";
21444 fpu_name = arm_fpu_desc->name;
21445 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21447 if (TARGET_HARD_FLOAT)
21448 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21449 if (TARGET_HARD_FLOAT_ABI)
21450 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21453 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21455 /* Some of these attributes only apply when the corresponding features
21456 are used. However we don't have any easy way of figuring this out.
21457 Conservatively record the setting that would have been used. */
21459 /* Tag_ABI_FP_rounding. */
21460 if (flag_rounding_math)
21461 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21462 if (!flag_unsafe_math_optimizations)
21464 /* Tag_ABI_FP_denomal. */
21465 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21466 /* Tag_ABI_FP_exceptions. */
21467 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21469 /* Tag_ABI_FP_user_exceptions. */
21470 if (flag_signaling_nans)
21471 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21472 /* Tag_ABI_FP_number_model. */
21473 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21474 flag_finite_math_only ? 1 : 3);
21476 /* Tag_ABI_align8_needed. */
21477 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21478 /* Tag_ABI_align8_preserved. */
21479 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21480 /* Tag_ABI_enum_size. */
21481 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21482 flag_short_enums ? 1 : 2);
21484 /* Tag_ABI_optimization_goals. */
21487 else if (optimize >= 2)
21493 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21495 /* Tag_ABI_FP_16bit_format. */
21496 if (arm_fp16_format)
21497 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21498 (int)arm_fp16_format);
21500 if (arm_lang_output_object_attributes_hook)
21501 arm_lang_output_object_attributes_hook();
21503 default_file_start();
21507 arm_file_end (void)
21511 if (NEED_INDICATE_EXEC_STACK)
21512 /* Add .note.GNU-stack. */
21513 file_end_indicate_exec_stack ();
21515 if (! thumb_call_reg_needed)
21518 switch_to_section (text_section);
21519 asm_fprintf (asm_out_file, "\t.code 16\n");
21520 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21522 for (regno = 0; regno < LR_REGNUM; regno++)
21524 rtx label = thumb_call_via_label[regno];
21528 targetm.asm_out.internal_label (asm_out_file, "L",
21529 CODE_LABEL_NUMBER (label));
21530 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21536 /* Symbols in the text segment can be accessed without indirecting via the
21537 constant pool; it may take an extra binary operation, but this is still
21538 faster than indirecting via memory. Don't do this when not optimizing,
21539 since we won't be calculating al of the offsets necessary to do this
21543 arm_encode_section_info (tree decl, rtx rtl, int first)
21545 if (optimize > 0 && TREE_CONSTANT (decl))
21546 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21548 default_encode_section_info (decl, rtl, first);
21550 #endif /* !ARM_PE */
21553 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21555 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21556 && !strcmp (prefix, "L"))
21558 arm_ccfsm_state = 0;
21559 arm_target_insn = NULL;
21561 default_internal_label (stream, prefix, labelno);
21564 /* Output code to add DELTA to the first argument, and then jump
21565 to FUNCTION. Used for C++ multiple inheritance. */
21567 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21568 HOST_WIDE_INT delta,
21569 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21572 static int thunk_label = 0;
21575 int mi_delta = delta;
21576 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21578 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21581 mi_delta = - mi_delta;
21585 int labelno = thunk_label++;
21586 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21587 /* Thunks are entered in arm mode when avaiable. */
21588 if (TARGET_THUMB1_ONLY)
21590 /* push r3 so we can use it as a temporary. */
21591 /* TODO: Omit this save if r3 is not used. */
21592 fputs ("\tpush {r3}\n", file);
21593 fputs ("\tldr\tr3, ", file);
21597 fputs ("\tldr\tr12, ", file);
21599 assemble_name (file, label);
21600 fputc ('\n', file);
21603 /* If we are generating PIC, the ldr instruction below loads
21604 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21605 the address of the add + 8, so we have:
21607 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21610 Note that we have "+ 1" because some versions of GNU ld
21611 don't set the low bit of the result for R_ARM_REL32
21612 relocations against thumb function symbols.
21613 On ARMv6M this is +4, not +8. */
21614 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21615 assemble_name (file, labelpc);
21616 fputs (":\n", file);
21617 if (TARGET_THUMB1_ONLY)
21619 /* This is 2 insns after the start of the thunk, so we know it
21620 is 4-byte aligned. */
21621 fputs ("\tadd\tr3, pc, r3\n", file);
21622 fputs ("\tmov r12, r3\n", file);
21625 fputs ("\tadd\tr12, pc, r12\n", file);
21627 else if (TARGET_THUMB1_ONLY)
21628 fputs ("\tmov r12, r3\n", file);
21630 if (TARGET_THUMB1_ONLY)
21632 if (mi_delta > 255)
21634 fputs ("\tldr\tr3, ", file);
21635 assemble_name (file, label);
21636 fputs ("+4\n", file);
21637 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21638 mi_op, this_regno, this_regno);
21640 else if (mi_delta != 0)
21642 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21643 mi_op, this_regno, this_regno,
21649 /* TODO: Use movw/movt for large constants when available. */
21650 while (mi_delta != 0)
21652 if ((mi_delta & (3 << shift)) == 0)
21656 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21657 mi_op, this_regno, this_regno,
21658 mi_delta & (0xff << shift));
21659 mi_delta &= ~(0xff << shift);
21666 if (TARGET_THUMB1_ONLY)
21667 fputs ("\tpop\t{r3}\n", file);
21669 fprintf (file, "\tbx\tr12\n");
21670 ASM_OUTPUT_ALIGN (file, 2);
21671 assemble_name (file, label);
21672 fputs (":\n", file);
21675 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21676 rtx tem = XEXP (DECL_RTL (function), 0);
21677 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21678 tem = gen_rtx_MINUS (GET_MODE (tem),
21680 gen_rtx_SYMBOL_REF (Pmode,
21681 ggc_strdup (labelpc)));
21682 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21685 /* Output ".word .LTHUNKn". */
21686 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21688 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21689 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21693 fputs ("\tb\t", file);
21694 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21695 if (NEED_PLT_RELOC)
21696 fputs ("(PLT)", file);
21697 fputc ('\n', file);
21702 arm_emit_vector_const (FILE *file, rtx x)
21705 const char * pattern;
21707 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21709 switch (GET_MODE (x))
21711 case V2SImode: pattern = "%08x"; break;
21712 case V4HImode: pattern = "%04x"; break;
21713 case V8QImode: pattern = "%02x"; break;
21714 default: gcc_unreachable ();
21717 fprintf (file, "0x");
21718 for (i = CONST_VECTOR_NUNITS (x); i--;)
21722 element = CONST_VECTOR_ELT (x, i);
21723 fprintf (file, pattern, INTVAL (element));
21729 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21730 HFmode constant pool entries are actually loaded with ldr. */
21732 arm_emit_fp16_const (rtx c)
21737 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21738 bits = real_to_target (NULL, &r, HFmode);
21739 if (WORDS_BIG_ENDIAN)
21740 assemble_zeros (2);
21741 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21742 if (!WORDS_BIG_ENDIAN)
21743 assemble_zeros (2);
21747 arm_output_load_gr (rtx *operands)
21754 if (GET_CODE (operands [1]) != MEM
21755 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21756 || GET_CODE (reg = XEXP (sum, 0)) != REG
21757 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21758 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21759 return "wldrw%?\t%0, %1";
21761 /* Fix up an out-of-range load of a GR register. */
21762 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21763 wcgr = operands[0];
21765 output_asm_insn ("ldr%?\t%0, %1", operands);
21767 operands[0] = wcgr;
21769 output_asm_insn ("tmcr%?\t%0, %1", operands);
21770 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21775 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21777 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21778 named arg and all anonymous args onto the stack.
21779 XXX I know the prologue shouldn't be pushing registers, but it is faster
21783 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21784 enum machine_mode mode,
21787 int second_time ATTRIBUTE_UNUSED)
21791 cfun->machine->uses_anonymous_args = 1;
21792 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21794 nregs = pcum->aapcs_ncrn;
21795 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21799 nregs = pcum->nregs;
21801 if (nregs < NUM_ARG_REGS)
21802 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21805 /* Return nonzero if the CONSUMER instruction (a store) does not need
21806 PRODUCER's value to calculate the address. */
21809 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21811 rtx value = PATTERN (producer);
21812 rtx addr = PATTERN (consumer);
21814 if (GET_CODE (value) == COND_EXEC)
21815 value = COND_EXEC_CODE (value);
21816 if (GET_CODE (value) == PARALLEL)
21817 value = XVECEXP (value, 0, 0);
21818 value = XEXP (value, 0);
21819 if (GET_CODE (addr) == COND_EXEC)
21820 addr = COND_EXEC_CODE (addr);
21821 if (GET_CODE (addr) == PARALLEL)
21822 addr = XVECEXP (addr, 0, 0);
21823 addr = XEXP (addr, 0);
21825 return !reg_overlap_mentioned_p (value, addr);
21828 /* Return nonzero if the CONSUMER instruction (a store) does need
21829 PRODUCER's value to calculate the address. */
21832 arm_early_store_addr_dep (rtx producer, rtx consumer)
21834 return !arm_no_early_store_addr_dep (producer, consumer);
21837 /* Return nonzero if the CONSUMER instruction (a load) does need
21838 PRODUCER's value to calculate the address. */
21841 arm_early_load_addr_dep (rtx producer, rtx consumer)
21843 rtx value = PATTERN (producer);
21844 rtx addr = PATTERN (consumer);
21846 if (GET_CODE (value) == COND_EXEC)
21847 value = COND_EXEC_CODE (value);
21848 if (GET_CODE (value) == PARALLEL)
21849 value = XVECEXP (value, 0, 0);
21850 value = XEXP (value, 0);
21851 if (GET_CODE (addr) == COND_EXEC)
21852 addr = COND_EXEC_CODE (addr);
21853 if (GET_CODE (addr) == PARALLEL)
21854 addr = XVECEXP (addr, 0, 0);
21855 addr = XEXP (addr, 1);
21857 return reg_overlap_mentioned_p (value, addr);
21860 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21861 have an early register shift value or amount dependency on the
21862 result of PRODUCER. */
21865 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21867 rtx value = PATTERN (producer);
21868 rtx op = PATTERN (consumer);
21871 if (GET_CODE (value) == COND_EXEC)
21872 value = COND_EXEC_CODE (value);
21873 if (GET_CODE (value) == PARALLEL)
21874 value = XVECEXP (value, 0, 0);
21875 value = XEXP (value, 0);
21876 if (GET_CODE (op) == COND_EXEC)
21877 op = COND_EXEC_CODE (op);
21878 if (GET_CODE (op) == PARALLEL)
21879 op = XVECEXP (op, 0, 0);
21882 early_op = XEXP (op, 0);
21883 /* This is either an actual independent shift, or a shift applied to
21884 the first operand of another operation. We want the whole shift
21886 if (GET_CODE (early_op) == REG)
21889 return !reg_overlap_mentioned_p (value, early_op);
21892 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21893 have an early register shift value dependency on the result of
21897 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21899 rtx value = PATTERN (producer);
21900 rtx op = PATTERN (consumer);
21903 if (GET_CODE (value) == COND_EXEC)
21904 value = COND_EXEC_CODE (value);
21905 if (GET_CODE (value) == PARALLEL)
21906 value = XVECEXP (value, 0, 0);
21907 value = XEXP (value, 0);
21908 if (GET_CODE (op) == COND_EXEC)
21909 op = COND_EXEC_CODE (op);
21910 if (GET_CODE (op) == PARALLEL)
21911 op = XVECEXP (op, 0, 0);
21914 early_op = XEXP (op, 0);
21916 /* This is either an actual independent shift, or a shift applied to
21917 the first operand of another operation. We want the value being
21918 shifted, in either case. */
21919 if (GET_CODE (early_op) != REG)
21920 early_op = XEXP (early_op, 0);
21922 return !reg_overlap_mentioned_p (value, early_op);
21925 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21926 have an early register mult dependency on the result of
21930 arm_no_early_mul_dep (rtx producer, rtx consumer)
21932 rtx value = PATTERN (producer);
21933 rtx op = PATTERN (consumer);
21935 if (GET_CODE (value) == COND_EXEC)
21936 value = COND_EXEC_CODE (value);
21937 if (GET_CODE (value) == PARALLEL)
21938 value = XVECEXP (value, 0, 0);
21939 value = XEXP (value, 0);
21940 if (GET_CODE (op) == COND_EXEC)
21941 op = COND_EXEC_CODE (op);
21942 if (GET_CODE (op) == PARALLEL)
21943 op = XVECEXP (op, 0, 0);
21946 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21948 if (GET_CODE (XEXP (op, 0)) == MULT)
21949 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21951 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21957 /* We can't rely on the caller doing the proper promotion when
21958 using APCS or ATPCS. */
21961 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21963 return !TARGET_AAPCS_BASED;
21966 static enum machine_mode
21967 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21968 enum machine_mode mode,
21969 int *punsignedp ATTRIBUTE_UNUSED,
21970 const_tree fntype ATTRIBUTE_UNUSED,
21971 int for_return ATTRIBUTE_UNUSED)
21973 if (GET_MODE_CLASS (mode) == MODE_INT
21974 && GET_MODE_SIZE (mode) < 4)
21980 /* AAPCS based ABIs use short enums by default. */
21983 arm_default_short_enums (void)
21985 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21989 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21992 arm_align_anon_bitfield (void)
21994 return TARGET_AAPCS_BASED;
21998 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22001 arm_cxx_guard_type (void)
22003 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22006 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22007 has an accumulator dependency on the result of the producer (a
22008 multiplication instruction) and no other dependency on that result. */
22010 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22012 rtx mul = PATTERN (producer);
22013 rtx mac = PATTERN (consumer);
22015 rtx mac_op0, mac_op1, mac_acc;
22017 if (GET_CODE (mul) == COND_EXEC)
22018 mul = COND_EXEC_CODE (mul);
22019 if (GET_CODE (mac) == COND_EXEC)
22020 mac = COND_EXEC_CODE (mac);
22022 /* Check that mul is of the form (set (...) (mult ...))
22023 and mla is of the form (set (...) (plus (mult ...) (...))). */
22024 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22025 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22026 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22029 mul_result = XEXP (mul, 0);
22030 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22031 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22032 mac_acc = XEXP (XEXP (mac, 1), 1);
22034 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22035 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22036 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22040 /* The EABI says test the least significant bit of a guard variable. */
22043 arm_cxx_guard_mask_bit (void)
22045 return TARGET_AAPCS_BASED;
22049 /* The EABI specifies that all array cookies are 8 bytes long. */
22052 arm_get_cookie_size (tree type)
22056 if (!TARGET_AAPCS_BASED)
22057 return default_cxx_get_cookie_size (type);
22059 size = build_int_cst (sizetype, 8);
22064 /* The EABI says that array cookies should also contain the element size. */
22067 arm_cookie_has_size (void)
22069 return TARGET_AAPCS_BASED;
22073 /* The EABI says constructors and destructors should return a pointer to
22074 the object constructed/destroyed. */
22077 arm_cxx_cdtor_returns_this (void)
22079 return TARGET_AAPCS_BASED;
22082 /* The EABI says that an inline function may never be the key
22086 arm_cxx_key_method_may_be_inline (void)
22088 return !TARGET_AAPCS_BASED;
22092 arm_cxx_determine_class_data_visibility (tree decl)
22094 if (!TARGET_AAPCS_BASED
22095 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22098 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22099 is exported. However, on systems without dynamic vague linkage,
22100 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22101 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22102 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22104 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22105 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22109 arm_cxx_class_data_always_comdat (void)
22111 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22112 vague linkage if the class has no key function. */
22113 return !TARGET_AAPCS_BASED;
22117 /* The EABI says __aeabi_atexit should be used to register static
22121 arm_cxx_use_aeabi_atexit (void)
22123 return TARGET_AAPCS_BASED;
22128 arm_set_return_address (rtx source, rtx scratch)
22130 arm_stack_offsets *offsets;
22131 HOST_WIDE_INT delta;
22133 unsigned long saved_regs;
22135 offsets = arm_get_frame_offsets ();
22136 saved_regs = offsets->saved_regs_mask;
22138 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22139 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22142 if (frame_pointer_needed)
22143 addr = plus_constant(hard_frame_pointer_rtx, -4);
22146 /* LR will be the first saved register. */
22147 delta = offsets->outgoing_args - (offsets->frame + 4);
22152 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22153 GEN_INT (delta & ~4095)));
22158 addr = stack_pointer_rtx;
22160 addr = plus_constant (addr, delta);
22162 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22168 thumb_set_return_address (rtx source, rtx scratch)
22170 arm_stack_offsets *offsets;
22171 HOST_WIDE_INT delta;
22172 HOST_WIDE_INT limit;
22175 unsigned long mask;
22179 offsets = arm_get_frame_offsets ();
22180 mask = offsets->saved_regs_mask;
22181 if (mask & (1 << LR_REGNUM))
22184 /* Find the saved regs. */
22185 if (frame_pointer_needed)
22187 delta = offsets->soft_frame - offsets->saved_args;
22188 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22194 delta = offsets->outgoing_args - offsets->saved_args;
22197 /* Allow for the stack frame. */
22198 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22200 /* The link register is always the first saved register. */
22203 /* Construct the address. */
22204 addr = gen_rtx_REG (SImode, reg);
22207 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22208 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22212 addr = plus_constant (addr, delta);
22214 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22217 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22220 /* Implements target hook vector_mode_supported_p. */
22222 arm_vector_mode_supported_p (enum machine_mode mode)
22224 /* Neon also supports V2SImode, etc. listed in the clause below. */
22225 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22226 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22229 if ((TARGET_NEON || TARGET_IWMMXT)
22230 && ((mode == V2SImode)
22231 || (mode == V4HImode)
22232 || (mode == V8QImode)))
22238 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22239 registers when autovectorizing for Neon, at least until multiple vector
22240 widths are supported properly by the middle-end. */
22242 static enum machine_mode
22243 arm_preferred_simd_mode (enum machine_mode mode)
22249 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22251 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22253 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22255 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22257 if (TARGET_NEON_VECTORIZE_QUAD)
22264 if (TARGET_REALLY_IWMMXT)
22280 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22282 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22283 using r0-r4 for function arguments, r7 for the stack frame and don't have
22284 enough left over to do doubleword arithmetic. For Thumb-2 all the
22285 potentially problematic instructions accept high registers so this is not
22286 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22287 that require many low registers. */
22289 arm_class_likely_spilled_p (reg_class_t rclass)
22291 if ((TARGET_THUMB1 && rclass == LO_REGS)
22292 || rclass == CC_REG)
22298 /* Implements target hook small_register_classes_for_mode_p. */
22300 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22302 return TARGET_THUMB1;
22305 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22306 ARM insns and therefore guarantee that the shift count is modulo 256.
22307 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22308 guarantee no particular behavior for out-of-range counts. */
22310 static unsigned HOST_WIDE_INT
22311 arm_shift_truncation_mask (enum machine_mode mode)
22313 return mode == SImode ? 255 : 0;
22317 /* Map internal gcc register numbers to DWARF2 register numbers. */
22320 arm_dbx_register_number (unsigned int regno)
22325 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22326 compatibility. The EABI defines them as registers 96-103. */
22327 if (IS_FPA_REGNUM (regno))
22328 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22330 if (IS_VFP_REGNUM (regno))
22332 /* See comment in arm_dwarf_register_span. */
22333 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22334 return 64 + regno - FIRST_VFP_REGNUM;
22336 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22339 if (IS_IWMMXT_GR_REGNUM (regno))
22340 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22342 if (IS_IWMMXT_REGNUM (regno))
22343 return 112 + regno - FIRST_IWMMXT_REGNUM;
22345 gcc_unreachable ();
22348 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22349 GCC models tham as 64 32-bit registers, so we need to describe this to
22350 the DWARF generation code. Other registers can use the default. */
22352 arm_dwarf_register_span (rtx rtl)
22359 regno = REGNO (rtl);
22360 if (!IS_VFP_REGNUM (regno))
22363 /* XXX FIXME: The EABI defines two VFP register ranges:
22364 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22366 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22367 corresponding D register. Until GDB supports this, we shall use the
22368 legacy encodings. We also use these encodings for D0-D15 for
22369 compatibility with older debuggers. */
22370 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22373 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22374 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22375 regno = (regno - FIRST_VFP_REGNUM) / 2;
22376 for (i = 0; i < nregs; i++)
22377 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22382 #if ARM_UNWIND_INFO
22383 /* Emit unwind directives for a store-multiple instruction or stack pointer
22384 push during alignment.
22385 These should only ever be generated by the function prologue code, so
22386 expect them to have a particular form. */
22389 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22392 HOST_WIDE_INT offset;
22393 HOST_WIDE_INT nregs;
22399 e = XVECEXP (p, 0, 0);
22400 if (GET_CODE (e) != SET)
22403 /* First insn will adjust the stack pointer. */
22404 if (GET_CODE (e) != SET
22405 || GET_CODE (XEXP (e, 0)) != REG
22406 || REGNO (XEXP (e, 0)) != SP_REGNUM
22407 || GET_CODE (XEXP (e, 1)) != PLUS)
22410 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22411 nregs = XVECLEN (p, 0) - 1;
22413 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22416 /* The function prologue may also push pc, but not annotate it as it is
22417 never restored. We turn this into a stack pointer adjustment. */
22418 if (nregs * 4 == offset - 4)
22420 fprintf (asm_out_file, "\t.pad #4\n");
22424 fprintf (asm_out_file, "\t.save {");
22426 else if (IS_VFP_REGNUM (reg))
22429 fprintf (asm_out_file, "\t.vsave {");
22431 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22433 /* FPA registers are done differently. */
22434 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22438 /* Unknown register type. */
22441 /* If the stack increment doesn't match the size of the saved registers,
22442 something has gone horribly wrong. */
22443 if (offset != nregs * reg_size)
22448 /* The remaining insns will describe the stores. */
22449 for (i = 1; i <= nregs; i++)
22451 /* Expect (set (mem <addr>) (reg)).
22452 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22453 e = XVECEXP (p, 0, i);
22454 if (GET_CODE (e) != SET
22455 || GET_CODE (XEXP (e, 0)) != MEM
22456 || GET_CODE (XEXP (e, 1)) != REG)
22459 reg = REGNO (XEXP (e, 1));
22464 fprintf (asm_out_file, ", ");
22465 /* We can't use %r for vfp because we need to use the
22466 double precision register names. */
22467 if (IS_VFP_REGNUM (reg))
22468 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22470 asm_fprintf (asm_out_file, "%r", reg);
22472 #ifdef ENABLE_CHECKING
22473 /* Check that the addresses are consecutive. */
22474 e = XEXP (XEXP (e, 0), 0);
22475 if (GET_CODE (e) == PLUS)
22477 offset += reg_size;
22478 if (GET_CODE (XEXP (e, 0)) != REG
22479 || REGNO (XEXP (e, 0)) != SP_REGNUM
22480 || GET_CODE (XEXP (e, 1)) != CONST_INT
22481 || offset != INTVAL (XEXP (e, 1)))
22485 || GET_CODE (e) != REG
22486 || REGNO (e) != SP_REGNUM)
22490 fprintf (asm_out_file, "}\n");
22493 /* Emit unwind directives for a SET. */
22496 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22504 switch (GET_CODE (e0))
22507 /* Pushing a single register. */
22508 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22509 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22510 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22513 asm_fprintf (asm_out_file, "\t.save ");
22514 if (IS_VFP_REGNUM (REGNO (e1)))
22515 asm_fprintf(asm_out_file, "{d%d}\n",
22516 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22518 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22522 if (REGNO (e0) == SP_REGNUM)
22524 /* A stack increment. */
22525 if (GET_CODE (e1) != PLUS
22526 || GET_CODE (XEXP (e1, 0)) != REG
22527 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22528 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22531 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22532 -INTVAL (XEXP (e1, 1)));
22534 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22536 HOST_WIDE_INT offset;
22538 if (GET_CODE (e1) == PLUS)
22540 if (GET_CODE (XEXP (e1, 0)) != REG
22541 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22543 reg = REGNO (XEXP (e1, 0));
22544 offset = INTVAL (XEXP (e1, 1));
22545 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22546 HARD_FRAME_POINTER_REGNUM, reg,
22549 else if (GET_CODE (e1) == REG)
22552 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22553 HARD_FRAME_POINTER_REGNUM, reg);
22558 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22560 /* Move from sp to reg. */
22561 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22563 else if (GET_CODE (e1) == PLUS
22564 && GET_CODE (XEXP (e1, 0)) == REG
22565 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22566 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22568 /* Set reg to offset from sp. */
22569 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22570 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22572 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22574 /* Stack pointer save before alignment. */
22576 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22589 /* Emit unwind directives for the given insn. */
22592 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22596 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22599 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22600 && (TREE_NOTHROW (current_function_decl)
22601 || crtl->all_throwers_are_sibcalls))
22604 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22607 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22609 pat = XEXP (pat, 0);
22611 pat = PATTERN (insn);
22613 switch (GET_CODE (pat))
22616 arm_unwind_emit_set (asm_out_file, pat);
22620 /* Store multiple. */
22621 arm_unwind_emit_sequence (asm_out_file, pat);
22630 /* Output a reference from a function exception table to the type_info
22631 object X. The EABI specifies that the symbol should be relocated by
22632 an R_ARM_TARGET2 relocation. */
22635 arm_output_ttype (rtx x)
22637 fputs ("\t.word\t", asm_out_file);
22638 output_addr_const (asm_out_file, x);
22639 /* Use special relocations for symbol references. */
22640 if (GET_CODE (x) != CONST_INT)
22641 fputs ("(TARGET2)", asm_out_file);
22642 fputc ('\n', asm_out_file);
22647 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22650 arm_asm_emit_except_personality (rtx personality)
22652 fputs ("\t.personality\t", asm_out_file);
22653 output_addr_const (asm_out_file, personality);
22654 fputc ('\n', asm_out_file);
22657 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22660 arm_asm_init_sections (void)
22662 exception_section = get_unnamed_section (0, output_section_asm_op,
22665 #endif /* ARM_UNWIND_INFO */
22667 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22669 static enum unwind_info_type
22670 arm_except_unwind_info (struct gcc_options *opts)
22672 /* Honor the --enable-sjlj-exceptions configure switch. */
22673 #ifdef CONFIG_SJLJ_EXCEPTIONS
22674 if (CONFIG_SJLJ_EXCEPTIONS)
22678 /* If not using ARM EABI unwind tables... */
22679 if (ARM_UNWIND_INFO)
22681 /* For simplicity elsewhere in this file, indicate that all unwind
22682 info is disabled if we're not emitting unwind tables. */
22683 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22689 /* ... we use sjlj exceptions for backwards compatibility. */
22694 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22695 stack alignment. */
22698 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22700 rtx unspec = SET_SRC (pattern);
22701 gcc_assert (GET_CODE (unspec) == UNSPEC);
22705 case UNSPEC_STACK_ALIGN:
22706 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22707 put anything on the stack, so hopefully it won't matter.
22708 CFA = SP will be correct after alignment. */
22709 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22710 SET_DEST (pattern));
22713 gcc_unreachable ();
22718 /* Output unwind directives for the start/end of a function. */
22721 arm_output_fn_unwind (FILE * f, bool prologue)
22723 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22727 fputs ("\t.fnstart\n", f);
22730 /* If this function will never be unwound, then mark it as such.
22731 The came condition is used in arm_unwind_emit to suppress
22732 the frame annotations. */
22733 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22734 && (TREE_NOTHROW (current_function_decl)
22735 || crtl->all_throwers_are_sibcalls))
22736 fputs("\t.cantunwind\n", f);
22738 fputs ("\t.fnend\n", f);
22743 arm_emit_tls_decoration (FILE *fp, rtx x)
22745 enum tls_reloc reloc;
22748 val = XVECEXP (x, 0, 0);
22749 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22751 output_addr_const (fp, val);
22756 fputs ("(tlsgd)", fp);
22759 fputs ("(tlsldm)", fp);
22762 fputs ("(tlsldo)", fp);
22765 fputs ("(gottpoff)", fp);
22768 fputs ("(tpoff)", fp);
22771 gcc_unreachable ();
22779 fputs (" + (. - ", fp);
22780 output_addr_const (fp, XVECEXP (x, 0, 2));
22782 output_addr_const (fp, XVECEXP (x, 0, 3));
22792 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22795 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22797 gcc_assert (size == 4);
22798 fputs ("\t.word\t", file);
22799 output_addr_const (file, x);
22800 fputs ("(tlsldo)", file);
22803 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22806 arm_output_addr_const_extra (FILE *fp, rtx x)
22808 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22809 return arm_emit_tls_decoration (fp, x);
22810 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22813 int labelno = INTVAL (XVECEXP (x, 0, 0));
22815 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22816 assemble_name_raw (fp, label);
22820 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22822 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22826 output_addr_const (fp, XVECEXP (x, 0, 0));
22830 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22832 output_addr_const (fp, XVECEXP (x, 0, 0));
22836 output_addr_const (fp, XVECEXP (x, 0, 1));
22840 else if (GET_CODE (x) == CONST_VECTOR)
22841 return arm_emit_vector_const (fp, x);
22846 /* Output assembly for a shift instruction.
22847 SET_FLAGS determines how the instruction modifies the condition codes.
22848 0 - Do not set condition codes.
22849 1 - Set condition codes.
22850 2 - Use smallest instruction. */
22852 arm_output_shift(rtx * operands, int set_flags)
22855 static const char flag_chars[3] = {'?', '.', '!'};
22860 c = flag_chars[set_flags];
22861 if (TARGET_UNIFIED_ASM)
22863 shift = shift_op(operands[3], &val);
22867 operands[2] = GEN_INT(val);
22868 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22871 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22874 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22875 output_asm_insn (pattern, operands);
22879 /* Output a Thumb-1 casesi dispatch sequence. */
22881 thumb1_output_casesi (rtx *operands)
22883 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22885 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22887 switch (GET_MODE(diff_vec))
22890 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22891 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22893 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22894 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22896 return "bl\t%___gnu_thumb1_case_si";
22898 gcc_unreachable ();
22902 /* Output a Thumb-2 casesi instruction. */
22904 thumb2_output_casesi (rtx *operands)
22906 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22908 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22910 output_asm_insn ("cmp\t%0, %1", operands);
22911 output_asm_insn ("bhi\t%l3", operands);
22912 switch (GET_MODE(diff_vec))
22915 return "tbb\t[%|pc, %0]";
22917 return "tbh\t[%|pc, %0, lsl #1]";
22921 output_asm_insn ("adr\t%4, %l2", operands);
22922 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22923 output_asm_insn ("add\t%4, %4, %5", operands);
22928 output_asm_insn ("adr\t%4, %l2", operands);
22929 return "ldr\t%|pc, [%4, %0, lsl #2]";
22932 gcc_unreachable ();
22936 /* Most ARM cores are single issue, but some newer ones can dual issue.
22937 The scheduler descriptions rely on this being correct. */
22939 arm_issue_rate (void)
22956 /* A table and a function to perform ARM-specific name mangling for
22957 NEON vector types in order to conform to the AAPCS (see "Procedure
22958 Call Standard for the ARM Architecture", Appendix A). To qualify
22959 for emission with the mangled names defined in that document, a
22960 vector type must not only be of the correct mode but also be
22961 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22964 enum machine_mode mode;
22965 const char *element_type_name;
22966 const char *aapcs_name;
22967 } arm_mangle_map_entry;
22969 static arm_mangle_map_entry arm_mangle_map[] = {
22970 /* 64-bit containerized types. */
22971 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22972 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22973 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22974 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22975 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22976 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22977 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22978 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22979 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22980 /* 128-bit containerized types. */
22981 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22982 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22983 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22984 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22985 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22986 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22987 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22988 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22989 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22990 { VOIDmode, NULL, NULL }
22994 arm_mangle_type (const_tree type)
22996 arm_mangle_map_entry *pos = arm_mangle_map;
22998 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22999 has to be managled as if it is in the "std" namespace. */
23000 if (TARGET_AAPCS_BASED
23001 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23003 static bool warned;
23004 if (!warned && warn_psabi && !in_system_header)
23007 inform (input_location,
23008 "the mangling of %<va_list%> has changed in GCC 4.4");
23010 return "St9__va_list";
23013 /* Half-precision float. */
23014 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23017 if (TREE_CODE (type) != VECTOR_TYPE)
23020 /* Check the mode of the vector type, and the name of the vector
23021 element type, against the table. */
23022 while (pos->mode != VOIDmode)
23024 tree elt_type = TREE_TYPE (type);
23026 if (pos->mode == TYPE_MODE (type)
23027 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23028 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23029 pos->element_type_name))
23030 return pos->aapcs_name;
23035 /* Use the default mangling for unrecognized (possibly user-defined)
23040 /* Order of allocation of core registers for Thumb: this allocation is
23041 written over the corresponding initial entries of the array
23042 initialized with REG_ALLOC_ORDER. We allocate all low registers
23043 first. Saving and restoring a low register is usually cheaper than
23044 using a call-clobbered high register. */
23046 static const int thumb_core_reg_alloc_order[] =
23048 3, 2, 1, 0, 4, 5, 6, 7,
23049 14, 12, 8, 9, 10, 11, 13, 15
23052 /* Adjust register allocation order when compiling for Thumb. */
23055 arm_order_regs_for_local_alloc (void)
23057 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23058 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23060 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23061 sizeof (thumb_core_reg_alloc_order));
23064 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23067 arm_frame_pointer_required (void)
23069 return (cfun->has_nonlocal_label
23070 || SUBTARGET_FRAME_POINTER_REQUIRED
23071 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23074 /* Only thumb1 can't support conditional execution, so return true if
23075 the target is not thumb1. */
23077 arm_have_conditional_execution (void)
23079 return !TARGET_THUMB1;
23082 /* Legitimize a memory reference for sync primitive implemented using
23083 ldrex / strex. We currently force the form of the reference to be
23084 indirect without offset. We do not yet support the indirect offset
23085 addressing supported by some ARM targets for these
23088 arm_legitimize_sync_memory (rtx memory)
23090 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23091 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23093 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23094 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23095 return legitimate_memory;
23098 /* An instruction emitter. */
23099 typedef void (* emit_f) (int label, const char *, rtx *);
23101 /* An instruction emitter that emits via the conventional
23102 output_asm_insn. */
23104 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23106 output_asm_insn (pattern, operands);
23109 /* Count the number of emitted synchronization instructions. */
23110 static unsigned arm_insn_count;
23112 /* An emitter that counts emitted instructions but does not actually
23113 emit instruction into the instruction stream. */
23115 arm_count (int label,
23116 const char *pattern ATTRIBUTE_UNUSED,
23117 rtx *operands ATTRIBUTE_UNUSED)
23123 /* Construct a pattern using conventional output formatting and feed
23124 it to output_asm_insn. Provides a mechanism to construct the
23125 output pattern on the fly. Note the hard limit on the pattern
23127 static void ATTRIBUTE_PRINTF_4
23128 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23129 const char *pattern, ...)
23134 va_start (ap, pattern);
23135 vsprintf (buffer, pattern, ap);
23137 emit (label, buffer, operands);
23140 /* Emit the memory barrier instruction, if any, provided by this
23141 target to a specified emitter. */
23143 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23145 if (TARGET_HAVE_DMB)
23147 /* Note we issue a system level barrier. We should consider
23148 issuing a inner shareabilty zone barrier here instead, ie.
23150 emit (0, "dmb\tsy", operands);
23154 if (TARGET_HAVE_DMB_MCR)
23156 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23160 gcc_unreachable ();
23163 /* Emit the memory barrier instruction, if any, provided by this
23166 arm_output_memory_barrier (rtx *operands)
23168 arm_process_output_memory_barrier (arm_emit, operands);
23172 /* Helper to figure out the instruction suffix required on ldrex/strex
23173 for operations on an object of the specified mode. */
23174 static const char *
23175 arm_ldrex_suffix (enum machine_mode mode)
23179 case QImode: return "b";
23180 case HImode: return "h";
23181 case SImode: return "";
23182 case DImode: return "d";
23184 gcc_unreachable ();
23189 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23192 arm_output_ldrex (emit_f emit,
23193 enum machine_mode mode,
23197 const char *suffix = arm_ldrex_suffix (mode);
23200 operands[0] = target;
23201 operands[1] = memory;
23202 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23205 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23208 arm_output_strex (emit_f emit,
23209 enum machine_mode mode,
23215 const char *suffix = arm_ldrex_suffix (mode);
23218 operands[0] = result;
23219 operands[1] = value;
23220 operands[2] = memory;
23221 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23225 /* Helper to emit a two operand instruction. */
23227 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23233 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23236 /* Helper to emit a three operand instruction. */
23238 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23245 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23248 /* Emit a load store exclusive synchronization loop.
23252 if old_value != required_value
23254 t1 = sync_op (old_value, new_value)
23255 [mem] = t1, t2 = [0|1]
23259 t1 == t2 is not permitted
23260 t1 == old_value is permitted
23264 RTX register or const_int representing the required old_value for
23265 the modify to continue, if NULL no comparsion is performed. */
23267 arm_output_sync_loop (emit_f emit,
23268 enum machine_mode mode,
23271 rtx required_value,
23275 enum attr_sync_op sync_op,
23276 int early_barrier_required)
23280 gcc_assert (t1 != t2);
23282 if (early_barrier_required)
23283 arm_process_output_memory_barrier (emit, NULL);
23285 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23287 arm_output_ldrex (emit, mode, old_value, memory);
23289 if (required_value)
23293 operands[0] = old_value;
23294 operands[1] = required_value;
23295 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23296 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23302 arm_output_op3 (emit, "add", t1, old_value, new_value);
23306 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23310 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23314 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23318 arm_output_op3 (emit,"and", t1, old_value, new_value);
23322 arm_output_op3 (emit, "and", t1, old_value, new_value);
23323 arm_output_op2 (emit, "mvn", t1, t1);
23333 arm_output_strex (emit, mode, "", t2, t1, memory);
23335 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23336 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23337 LOCAL_LABEL_PREFIX);
23341 /* Use old_value for the return value because for some operations
23342 the old_value can easily be restored. This saves one register. */
23343 arm_output_strex (emit, mode, "", old_value, t1, memory);
23344 operands[0] = old_value;
23345 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23346 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23347 LOCAL_LABEL_PREFIX);
23352 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23356 arm_output_op3 (emit, "add", old_value, t1, new_value);
23360 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23364 arm_output_op2 (emit, "mov", old_value, required_value);
23368 gcc_unreachable ();
23372 arm_process_output_memory_barrier (emit, NULL);
23373 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23377 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23380 default_value = operands[index - 1];
23382 return default_value;
23385 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23386 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23388 /* Extract the operands for a synchroniztion instruction from the
23389 instructions attributes and emit the instruction. */
23391 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23393 rtx result, memory, required_value, new_value, t1, t2;
23395 enum machine_mode mode;
23396 enum attr_sync_op sync_op;
23398 result = FETCH_SYNC_OPERAND(result, 0);
23399 memory = FETCH_SYNC_OPERAND(memory, 0);
23400 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23401 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23402 t1 = FETCH_SYNC_OPERAND(t1, 0);
23403 t2 = FETCH_SYNC_OPERAND(t2, 0);
23405 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23406 sync_op = get_attr_sync_op (insn);
23407 mode = GET_MODE (memory);
23409 arm_output_sync_loop (emit, mode, result, memory, required_value,
23410 new_value, t1, t2, sync_op, early_barrier);
23413 /* Emit a synchronization instruction loop. */
23415 arm_output_sync_insn (rtx insn, rtx *operands)
23417 arm_process_output_sync_insn (arm_emit, insn, operands);
23421 /* Count the number of machine instruction that will be emitted for a
23422 synchronization instruction. Note that the emitter used does not
23423 emit instructions, it just counts instructions being carefull not
23424 to count labels. */
23426 arm_sync_loop_insns (rtx insn, rtx *operands)
23428 arm_insn_count = 0;
23429 arm_process_output_sync_insn (arm_count, insn, operands);
23430 return arm_insn_count;
23433 /* Helper to call a target sync instruction generator, dealing with
23434 the variation in operands required by the different generators. */
23436 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23437 rtx memory, rtx required_value, rtx new_value)
23439 switch (generator->op)
23441 case arm_sync_generator_omn:
23442 gcc_assert (! required_value);
23443 return generator->u.omn (old_value, memory, new_value);
23445 case arm_sync_generator_omrn:
23446 gcc_assert (required_value);
23447 return generator->u.omrn (old_value, memory, required_value, new_value);
23453 /* Expand a synchronization loop. The synchronization loop is expanded
23454 as an opaque block of instructions in order to ensure that we do
23455 not subsequently get extraneous memory accesses inserted within the
23456 critical region. The exclusive access property of ldrex/strex is
23457 only guaranteed in there are no intervening memory accesses. */
23459 arm_expand_sync (enum machine_mode mode,
23460 struct arm_sync_generator *generator,
23461 rtx target, rtx memory, rtx required_value, rtx new_value)
23463 if (target == NULL)
23464 target = gen_reg_rtx (mode);
23466 memory = arm_legitimize_sync_memory (memory);
23467 if (mode != SImode)
23469 rtx load_temp = gen_reg_rtx (SImode);
23471 if (required_value)
23472 required_value = convert_modes (SImode, mode, required_value, true);
23474 new_value = convert_modes (SImode, mode, new_value, true);
23475 emit_insn (arm_call_generator (generator, load_temp, memory,
23476 required_value, new_value));
23477 emit_move_insn (target, gen_lowpart (mode, load_temp));
23481 emit_insn (arm_call_generator (generator, target, memory, required_value,
23486 static unsigned int
23487 arm_autovectorize_vector_sizes (void)
23489 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23493 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23495 /* Vectors which aren't in packed structures will not be less aligned than
23496 the natural alignment of their element type, so this is safe. */
23497 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23500 return default_builtin_vector_alignment_reachable (type, is_packed);
23504 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23505 const_tree type, int misalignment,
23508 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23510 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23515 /* If the misalignment is unknown, we should be able to handle the access
23516 so long as it is not to a member of a packed data structure. */
23517 if (misalignment == -1)
23520 /* Return true if the misalignment is a multiple of the natural alignment
23521 of the vector's element type. This is probably always going to be
23522 true in practice, since we've already established that this isn't a
23524 return ((misalignment % align) == 0);
23527 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23532 arm_conditional_register_usage (void)
23536 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23538 for (regno = FIRST_FPA_REGNUM;
23539 regno <= LAST_FPA_REGNUM; ++regno)
23540 fixed_regs[regno] = call_used_regs[regno] = 1;
23543 if (TARGET_THUMB1 && optimize_size)
23545 /* When optimizing for size on Thumb-1, it's better not
23546 to use the HI regs, because of the overhead of
23548 for (regno = FIRST_HI_REGNUM;
23549 regno <= LAST_HI_REGNUM; ++regno)
23550 fixed_regs[regno] = call_used_regs[regno] = 1;
23553 /* The link register can be clobbered by any branch insn,
23554 but we have no way to track that at present, so mark
23555 it as unavailable. */
23557 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23559 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23561 if (TARGET_MAVERICK)
23563 for (regno = FIRST_FPA_REGNUM;
23564 regno <= LAST_FPA_REGNUM; ++ regno)
23565 fixed_regs[regno] = call_used_regs[regno] = 1;
23566 for (regno = FIRST_CIRRUS_FP_REGNUM;
23567 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23569 fixed_regs[regno] = 0;
23570 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23575 /* VFPv3 registers are disabled when earlier VFP
23576 versions are selected due to the definition of
23577 LAST_VFP_REGNUM. */
23578 for (regno = FIRST_VFP_REGNUM;
23579 regno <= LAST_VFP_REGNUM; ++ regno)
23581 fixed_regs[regno] = 0;
23582 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23583 || regno >= FIRST_VFP_REGNUM + 32;
23588 if (TARGET_REALLY_IWMMXT)
23590 regno = FIRST_IWMMXT_GR_REGNUM;
23591 /* The 2002/10/09 revision of the XScale ABI has wCG0
23592 and wCG1 as call-preserved registers. The 2002/11/21
23593 revision changed this so that all wCG registers are
23594 scratch registers. */
23595 for (regno = FIRST_IWMMXT_GR_REGNUM;
23596 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23597 fixed_regs[regno] = 0;
23598 /* The XScale ABI has wR0 - wR9 as scratch registers,
23599 the rest as call-preserved registers. */
23600 for (regno = FIRST_IWMMXT_REGNUM;
23601 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23603 fixed_regs[regno] = 0;
23604 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23608 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23610 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23611 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23613 else if (TARGET_APCS_STACK)
23615 fixed_regs[10] = 1;
23616 call_used_regs[10] = 1;
23618 /* -mcaller-super-interworking reserves r11 for calls to
23619 _interwork_r11_call_via_rN(). Making the register global
23620 is an easy way of ensuring that it remains valid for all
23622 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23623 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23625 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23626 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23627 if (TARGET_CALLER_INTERWORKING)
23628 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23630 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23634 arm_preferred_rename_class (reg_class_t rclass)
23636 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23637 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23638 and code size can be reduced. */
23639 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23645 /* Compute the atrribute "length" of insn "*push_multi".
23646 So this function MUST be kept in sync with that insn pattern. */
23648 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23650 int i, regno, hi_reg;
23651 int num_saves = XVECLEN (parallel_op, 0);
23658 regno = REGNO (first_op);
23659 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23660 for (i = 1; i < num_saves && !hi_reg; i++)
23662 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23663 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23671 #include "gt-arm.h"