1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
181 static void arm_encode_section_info (tree, rtx, int);
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
250 static void arm_conditional_register_usage (void);
251 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
254 /* Table of machine attributes. */
255 static const struct attribute_spec arm_attribute_table[] =
257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
258 /* Function calls made to this symbol must be done indirectly, because
259 it may lie outside of the 26 bit addressing range of a normal function
261 { "long_call", 0, 0, false, true, true, NULL },
262 /* Whereas these functions are always known to reside within the 26 bit
264 { "short_call", 0, 0, false, true, true, NULL },
265 /* Specify the procedure call conventions for a function. */
266 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
267 /* Interrupt Service Routines have special prologue and epilogue requirements. */
268 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
269 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
270 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
272 /* ARM/PE has three new attributes:
274 dllexport - for exporting a function/variable that will live in a dll
275 dllimport - for importing a function/variable from a dll
277 Microsoft allows multiple declspecs in one __declspec, separating
278 them with spaces. We do NOT support this. Instead, use __declspec
281 { "dllimport", 0, 0, true, false, false, NULL },
282 { "dllexport", 0, 0, true, false, false, NULL },
283 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
284 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
285 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
286 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
287 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
289 { NULL, 0, 0, false, false, false, NULL }
292 /* Set default optimization options. */
293 static const struct default_options arm_option_optimization_table[] =
295 /* Enable section anchors by default at -O1 or higher. */
296 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
297 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
298 { OPT_LEVELS_NONE, 0, NULL, 0 }
301 /* Initialize the GCC target structure. */
302 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
303 #undef TARGET_MERGE_DECL_ATTRIBUTES
304 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
307 #undef TARGET_LEGITIMIZE_ADDRESS
308 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
310 #undef TARGET_ATTRIBUTE_TABLE
311 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START arm_file_start
315 #undef TARGET_ASM_FILE_END
316 #define TARGET_ASM_FILE_END arm_file_end
318 #undef TARGET_ASM_ALIGNED_SI_OP
319 #define TARGET_ASM_ALIGNED_SI_OP NULL
320 #undef TARGET_ASM_INTEGER
321 #define TARGET_ASM_INTEGER arm_assemble_integer
323 #undef TARGET_PRINT_OPERAND
324 #define TARGET_PRINT_OPERAND arm_print_operand
325 #undef TARGET_PRINT_OPERAND_ADDRESS
326 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
327 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
328 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
330 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
331 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
333 #undef TARGET_ASM_FUNCTION_PROLOGUE
334 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
336 #undef TARGET_ASM_FUNCTION_EPILOGUE
337 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
339 #undef TARGET_DEFAULT_TARGET_FLAGS
340 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
341 #undef TARGET_HANDLE_OPTION
342 #define TARGET_HANDLE_OPTION arm_handle_option
344 #define TARGET_HELP arm_target_help
345 #undef TARGET_OPTION_OVERRIDE
346 #define TARGET_OPTION_OVERRIDE arm_option_override
347 #undef TARGET_OPTION_OPTIMIZATION_TABLE
348 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
350 #undef TARGET_COMP_TYPE_ATTRIBUTES
351 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
353 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
354 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
356 #undef TARGET_SCHED_ADJUST_COST
357 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
359 #undef TARGET_ENCODE_SECTION_INFO
361 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
363 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
366 #undef TARGET_STRIP_NAME_ENCODING
367 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
369 #undef TARGET_ASM_INTERNAL_LABEL
370 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
372 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
373 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
375 #undef TARGET_FUNCTION_VALUE
376 #define TARGET_FUNCTION_VALUE arm_function_value
378 #undef TARGET_LIBCALL_VALUE
379 #define TARGET_LIBCALL_VALUE arm_libcall_value
381 #undef TARGET_ASM_OUTPUT_MI_THUNK
382 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
383 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
384 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
386 #undef TARGET_RTX_COSTS
387 #define TARGET_RTX_COSTS arm_rtx_costs
388 #undef TARGET_ADDRESS_COST
389 #define TARGET_ADDRESS_COST arm_address_cost
391 #undef TARGET_SHIFT_TRUNCATION_MASK
392 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
393 #undef TARGET_VECTOR_MODE_SUPPORTED_P
394 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
395 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
396 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
398 #undef TARGET_MACHINE_DEPENDENT_REORG
399 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
401 #undef TARGET_INIT_BUILTINS
402 #define TARGET_INIT_BUILTINS arm_init_builtins
403 #undef TARGET_EXPAND_BUILTIN
404 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
406 #undef TARGET_INIT_LIBFUNCS
407 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
409 #undef TARGET_PROMOTE_FUNCTION_MODE
410 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
413 #undef TARGET_PASS_BY_REFERENCE
414 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
415 #undef TARGET_ARG_PARTIAL_BYTES
416 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
417 #undef TARGET_FUNCTION_ARG
418 #define TARGET_FUNCTION_ARG arm_function_arg
419 #undef TARGET_FUNCTION_ARG_ADVANCE
420 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
421 #undef TARGET_FUNCTION_ARG_BOUNDARY
422 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
424 #undef TARGET_SETUP_INCOMING_VARARGS
425 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
427 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
428 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
430 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
431 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
432 #undef TARGET_TRAMPOLINE_INIT
433 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
434 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
435 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
437 #undef TARGET_DEFAULT_SHORT_ENUMS
438 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
440 #undef TARGET_ALIGN_ANON_BITFIELD
441 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
443 #undef TARGET_NARROW_VOLATILE_BITFIELD
444 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
446 #undef TARGET_CXX_GUARD_TYPE
447 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
449 #undef TARGET_CXX_GUARD_MASK_BIT
450 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
452 #undef TARGET_CXX_GET_COOKIE_SIZE
453 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
455 #undef TARGET_CXX_COOKIE_HAS_SIZE
456 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
458 #undef TARGET_CXX_CDTOR_RETURNS_THIS
459 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
461 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
462 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
464 #undef TARGET_CXX_USE_AEABI_ATEXIT
465 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
467 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
468 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
469 arm_cxx_determine_class_data_visibility
471 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
472 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
474 #undef TARGET_RETURN_IN_MSB
475 #define TARGET_RETURN_IN_MSB arm_return_in_msb
477 #undef TARGET_RETURN_IN_MEMORY
478 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
480 #undef TARGET_MUST_PASS_IN_STACK
481 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
484 #undef TARGET_ASM_UNWIND_EMIT
485 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
487 /* EABI unwinding tables use a different format for the typeinfo tables. */
488 #undef TARGET_ASM_TTYPE
489 #define TARGET_ASM_TTYPE arm_output_ttype
491 #undef TARGET_ARM_EABI_UNWINDER
492 #define TARGET_ARM_EABI_UNWINDER true
494 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
495 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
497 #undef TARGET_ASM_INIT_SECTIONS
498 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
499 #endif /* ARM_UNWIND_INFO */
501 #undef TARGET_EXCEPT_UNWIND_INFO
502 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
504 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
505 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
507 #undef TARGET_DWARF_REGISTER_SPAN
508 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
510 #undef TARGET_CANNOT_COPY_INSN_P
511 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
514 #undef TARGET_HAVE_TLS
515 #define TARGET_HAVE_TLS true
518 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
519 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
521 #undef TARGET_CANNOT_FORCE_CONST_MEM
522 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
524 #undef TARGET_MAX_ANCHOR_OFFSET
525 #define TARGET_MAX_ANCHOR_OFFSET 4095
527 /* The minimum is set such that the total size of the block
528 for a particular anchor is -4088 + 1 + 4095 bytes, which is
529 divisible by eight, ensuring natural spacing of anchors. */
530 #undef TARGET_MIN_ANCHOR_OFFSET
531 #define TARGET_MIN_ANCHOR_OFFSET -4088
533 #undef TARGET_SCHED_ISSUE_RATE
534 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
536 #undef TARGET_MANGLE_TYPE
537 #define TARGET_MANGLE_TYPE arm_mangle_type
539 #undef TARGET_BUILD_BUILTIN_VA_LIST
540 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
541 #undef TARGET_EXPAND_BUILTIN_VA_START
542 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
543 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
544 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
547 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
548 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
551 #undef TARGET_LEGITIMATE_ADDRESS_P
552 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
554 #undef TARGET_INVALID_PARAMETER_TYPE
555 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
557 #undef TARGET_INVALID_RETURN_TYPE
558 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
560 #undef TARGET_PROMOTED_TYPE
561 #define TARGET_PROMOTED_TYPE arm_promoted_type
563 #undef TARGET_CONVERT_TO_TYPE
564 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
566 #undef TARGET_SCALAR_MODE_SUPPORTED_P
567 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
569 #undef TARGET_FRAME_POINTER_REQUIRED
570 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
572 #undef TARGET_CAN_ELIMINATE
573 #define TARGET_CAN_ELIMINATE arm_can_eliminate
575 #undef TARGET_CONDITIONAL_REGISTER_USAGE
576 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
578 #undef TARGET_CLASS_LIKELY_SPILLED_P
579 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
581 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
582 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
583 arm_vector_alignment_reachable
585 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
586 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
587 arm_builtin_support_vector_misalignment
589 #undef TARGET_PREFERRED_RENAME_CLASS
590 #define TARGET_PREFERRED_RENAME_CLASS \
591 arm_preferred_rename_class
593 struct gcc_target targetm = TARGET_INITIALIZER;
595 /* Obstack for minipool constant handling. */
596 static struct obstack minipool_obstack;
597 static char * minipool_startobj;
599 /* The maximum number of insns skipped which
600 will be conditionalised if possible. */
601 static int max_insns_skipped = 5;
603 extern FILE * asm_out_file;
605 /* True if we are currently building a constant table. */
606 int making_const_table;
608 /* The processor for which instructions should be scheduled. */
609 enum processor_type arm_tune = arm_none;
611 /* The current tuning set. */
612 const struct tune_params *current_tune;
614 /* Which floating point hardware to schedule for. */
617 /* Which floating popint hardware to use. */
618 const struct arm_fpu_desc *arm_fpu_desc;
620 /* Whether to use floating point hardware. */
621 enum float_abi_type arm_float_abi;
623 /* Which __fp16 format to use. */
624 enum arm_fp16_format_type arm_fp16_format;
626 /* Which ABI to use. */
627 enum arm_abi_type arm_abi;
629 /* Which thread pointer model to use. */
630 enum arm_tp_type target_thread_pointer = TP_AUTO;
632 /* Used to parse -mstructure_size_boundary command line option. */
633 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
635 /* Used for Thumb call_via trampolines. */
636 rtx thumb_call_via_label[14];
637 static int thumb_call_reg_needed;
639 /* Bit values used to identify processor capabilities. */
640 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
641 #define FL_ARCH3M (1 << 1) /* Extended multiply */
642 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
643 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
644 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
645 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
646 #define FL_THUMB (1 << 6) /* Thumb aware */
647 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
648 #define FL_STRONG (1 << 8) /* StrongARM */
649 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
650 #define FL_XSCALE (1 << 10) /* XScale */
651 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
652 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
653 media instructions. */
654 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
655 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
656 Note: ARM6 & 7 derivatives only. */
657 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
658 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
659 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
661 #define FL_DIV (1 << 18) /* Hardware divide. */
662 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
663 #define FL_NEON (1 << 20) /* Neon instructions. */
664 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
666 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
668 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
670 /* Flags that only effect tuning, not available instructions. */
671 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
674 #define FL_FOR_ARCH2 FL_NOTM
675 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
676 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
677 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
678 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
679 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
680 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
681 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
682 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
683 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
684 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
685 #define FL_FOR_ARCH6J FL_FOR_ARCH6
686 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
687 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
688 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
689 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
690 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
691 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
692 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
693 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
694 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
695 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
697 /* The bits in this mask specify which
698 instructions we are allowed to generate. */
699 static unsigned long insn_flags = 0;
701 /* The bits in this mask specify which instruction scheduling options should
703 static unsigned long tune_flags = 0;
705 /* The following are used in the arm.md file as equivalents to bits
706 in the above two flag variables. */
708 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
711 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
714 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
717 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
720 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
723 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
726 /* Nonzero if this chip supports the ARM 6K extensions. */
729 /* Nonzero if this chip supports the ARM 7 extensions. */
732 /* Nonzero if instructions not present in the 'M' profile can be used. */
733 int arm_arch_notm = 0;
735 /* Nonzero if instructions present in ARMv7E-M can be used. */
738 /* Nonzero if this chip can benefit from load scheduling. */
739 int arm_ld_sched = 0;
741 /* Nonzero if this chip is a StrongARM. */
742 int arm_tune_strongarm = 0;
744 /* Nonzero if this chip is a Cirrus variant. */
745 int arm_arch_cirrus = 0;
747 /* Nonzero if this chip supports Intel Wireless MMX technology. */
748 int arm_arch_iwmmxt = 0;
750 /* Nonzero if this chip is an XScale. */
751 int arm_arch_xscale = 0;
753 /* Nonzero if tuning for XScale */
754 int arm_tune_xscale = 0;
756 /* Nonzero if we want to tune for stores that access the write-buffer.
757 This typically means an ARM6 or ARM7 with MMU or MPU. */
758 int arm_tune_wbuf = 0;
760 /* Nonzero if tuning for Cortex-A9. */
761 int arm_tune_cortex_a9 = 0;
763 /* Nonzero if generating Thumb instructions. */
766 /* Nonzero if generating Thumb-1 instructions. */
769 /* Nonzero if we should define __THUMB_INTERWORK__ in the
771 XXX This is a bit of a hack, it's intended to help work around
772 problems in GLD which doesn't understand that armv5t code is
773 interworking clean. */
774 int arm_cpp_interwork = 0;
776 /* Nonzero if chip supports Thumb 2. */
779 /* Nonzero if chip supports integer division instruction. */
782 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
783 we must report the mode of the memory reference from
784 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
785 enum machine_mode output_memory_reference_mode;
787 /* The register number to be used for the PIC offset register. */
788 unsigned arm_pic_register = INVALID_REGNUM;
790 /* Set to 1 after arm_reorg has started. Reset to start at the start of
791 the next function. */
792 static int after_arm_reorg = 0;
794 enum arm_pcs arm_pcs_default;
796 /* For an explanation of these variables, see final_prescan_insn below. */
798 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
799 enum arm_cond_code arm_current_cc;
802 int arm_target_label;
803 /* The number of conditionally executed insns, including the current insn. */
804 int arm_condexec_count = 0;
805 /* A bitmask specifying the patterns for the IT block.
806 Zero means do not output an IT block before this insn. */
807 int arm_condexec_mask = 0;
808 /* The number of bits used in arm_condexec_mask. */
809 int arm_condexec_masklen = 0;
811 /* The condition codes of the ARM, and the inverse function. */
812 static const char * const arm_condition_codes[] =
814 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
815 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
818 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
819 int arm_regs_in_sequence[] =
821 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
824 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
825 #define streq(string1, string2) (strcmp (string1, string2) == 0)
827 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
828 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
829 | (1 << PIC_OFFSET_TABLE_REGNUM)))
831 /* Initialization code. */
835 const char *const name;
836 enum processor_type core;
838 const unsigned long flags;
839 const struct tune_params *const tune;
843 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
844 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
849 const struct tune_params arm_slowmul_tune =
851 arm_slowmul_rtx_costs,
854 ARM_PREFETCH_NOT_BENEFICIAL
857 const struct tune_params arm_fastmul_tune =
859 arm_fastmul_rtx_costs,
862 ARM_PREFETCH_NOT_BENEFICIAL
865 const struct tune_params arm_xscale_tune =
867 arm_xscale_rtx_costs,
868 xscale_sched_adjust_cost,
870 ARM_PREFETCH_NOT_BENEFICIAL
873 const struct tune_params arm_9e_tune =
878 ARM_PREFETCH_NOT_BENEFICIAL
881 const struct tune_params arm_cortex_a9_tune =
884 cortex_a9_sched_adjust_cost,
886 ARM_PREFETCH_BENEFICIAL(4,32,32)
890 /* Not all of these give usefully different compilation alternatives,
891 but there is no simple way of generalizing them. */
892 static const struct processors all_cores[] =
895 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
896 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
897 #include "arm-cores.def"
899 {NULL, arm_none, NULL, 0, NULL}
902 static const struct processors all_architectures[] =
904 /* ARM Architectures */
905 /* We don't specify tuning costs here as it will be figured out
908 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
909 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
910 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
911 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
912 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
913 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
914 implementations that support it, so we will leave it out for now. */
915 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
916 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
917 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
918 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
919 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
920 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
921 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
922 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
923 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
924 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
925 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
926 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
927 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
928 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
929 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
930 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
931 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
932 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
933 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
934 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
935 {NULL, arm_none, NULL, 0 , NULL}
939 /* These are populated as commandline arguments are processed, or NULL
941 static const struct processors *arm_selected_arch;
942 static const struct processors *arm_selected_cpu;
943 static const struct processors *arm_selected_tune;
945 /* The name of the preprocessor macro to define for this architecture. */
947 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
949 /* Available values for -mfpu=. */
951 static const struct arm_fpu_desc all_fpus[] =
953 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
954 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
955 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
956 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
957 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
958 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
959 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
960 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
961 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
962 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
963 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
964 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
965 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
966 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
967 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
968 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
969 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
970 /* Compatibility aliases. */
971 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
978 enum float_abi_type abi_type;
982 /* Available values for -mfloat-abi=. */
984 static const struct float_abi all_float_abis[] =
986 {"soft", ARM_FLOAT_ABI_SOFT},
987 {"softfp", ARM_FLOAT_ABI_SOFTFP},
988 {"hard", ARM_FLOAT_ABI_HARD}
995 enum arm_fp16_format_type fp16_format_type;
999 /* Available values for -mfp16-format=. */
1001 static const struct fp16_format all_fp16_formats[] =
1003 {"none", ARM_FP16_FORMAT_NONE},
1004 {"ieee", ARM_FP16_FORMAT_IEEE},
1005 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1012 enum arm_abi_type abi_type;
1016 /* Available values for -mabi=. */
1018 static const struct abi_name arm_all_abis[] =
1020 {"apcs-gnu", ARM_ABI_APCS},
1021 {"atpcs", ARM_ABI_ATPCS},
1022 {"aapcs", ARM_ABI_AAPCS},
1023 {"iwmmxt", ARM_ABI_IWMMXT},
1024 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1027 /* Supported TLS relocations. */
1037 /* The maximum number of insns to be used when loading a constant. */
1039 arm_constant_limit (bool size_p)
1041 return size_p ? 1 : current_tune->constant_limit;
1044 /* Emit an insn that's a simple single-set. Both the operands must be known
1047 emit_set_insn (rtx x, rtx y)
1049 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1052 /* Return the number of bits set in VALUE. */
1054 bit_count (unsigned long value)
1056 unsigned long count = 0;
1061 value &= value - 1; /* Clear the least-significant set bit. */
1067 /* Set up library functions unique to ARM. */
1070 arm_init_libfuncs (void)
1072 /* There are no special library functions unless we are using the
1077 /* The functions below are described in Section 4 of the "Run-Time
1078 ABI for the ARM architecture", Version 1.0. */
1080 /* Double-precision floating-point arithmetic. Table 2. */
1081 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1082 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1083 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1084 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1085 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1087 /* Double-precision comparisons. Table 3. */
1088 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1089 set_optab_libfunc (ne_optab, DFmode, NULL);
1090 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1091 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1092 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1093 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1094 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1096 /* Single-precision floating-point arithmetic. Table 4. */
1097 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1098 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1099 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1100 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1101 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1103 /* Single-precision comparisons. Table 5. */
1104 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1105 set_optab_libfunc (ne_optab, SFmode, NULL);
1106 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1107 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1108 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1109 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1110 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1112 /* Floating-point to integer conversions. Table 6. */
1113 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1114 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1115 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1116 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1117 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1118 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1119 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1120 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1122 /* Conversions between floating types. Table 7. */
1123 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1124 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1126 /* Integer to floating-point conversions. Table 8. */
1127 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1128 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1129 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1130 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1131 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1132 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1133 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1134 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1136 /* Long long. Table 9. */
1137 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1138 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1139 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1140 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1141 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1142 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1143 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1144 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1146 /* Integer (32/32->32) division. \S 4.3.1. */
1147 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1148 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1150 /* The divmod functions are designed so that they can be used for
1151 plain division, even though they return both the quotient and the
1152 remainder. The quotient is returned in the usual location (i.e.,
1153 r0 for SImode, {r0, r1} for DImode), just as would be expected
1154 for an ordinary division routine. Because the AAPCS calling
1155 conventions specify that all of { r0, r1, r2, r3 } are
1156 callee-saved registers, there is no need to tell the compiler
1157 explicitly that those registers are clobbered by these
1159 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1160 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1162 /* For SImode division the ABI provides div-without-mod routines,
1163 which are faster. */
1164 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1165 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1167 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1168 divmod libcalls instead. */
1169 set_optab_libfunc (smod_optab, DImode, NULL);
1170 set_optab_libfunc (umod_optab, DImode, NULL);
1171 set_optab_libfunc (smod_optab, SImode, NULL);
1172 set_optab_libfunc (umod_optab, SImode, NULL);
1174 /* Half-precision float operations. The compiler handles all operations
1175 with NULL libfuncs by converting the SFmode. */
1176 switch (arm_fp16_format)
1178 case ARM_FP16_FORMAT_IEEE:
1179 case ARM_FP16_FORMAT_ALTERNATIVE:
1182 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1183 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1185 : "__gnu_f2h_alternative"));
1186 set_conv_libfunc (sext_optab, SFmode, HFmode,
1187 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1189 : "__gnu_h2f_alternative"));
1192 set_optab_libfunc (add_optab, HFmode, NULL);
1193 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1194 set_optab_libfunc (smul_optab, HFmode, NULL);
1195 set_optab_libfunc (neg_optab, HFmode, NULL);
1196 set_optab_libfunc (sub_optab, HFmode, NULL);
1199 set_optab_libfunc (eq_optab, HFmode, NULL);
1200 set_optab_libfunc (ne_optab, HFmode, NULL);
1201 set_optab_libfunc (lt_optab, HFmode, NULL);
1202 set_optab_libfunc (le_optab, HFmode, NULL);
1203 set_optab_libfunc (ge_optab, HFmode, NULL);
1204 set_optab_libfunc (gt_optab, HFmode, NULL);
1205 set_optab_libfunc (unord_optab, HFmode, NULL);
1212 if (TARGET_AAPCS_BASED)
1213 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1216 /* On AAPCS systems, this is the "struct __va_list". */
1217 static GTY(()) tree va_list_type;
1219 /* Return the type to use as __builtin_va_list. */
1221 arm_build_builtin_va_list (void)
1226 if (!TARGET_AAPCS_BASED)
1227 return std_build_builtin_va_list ();
1229 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1237 The C Library ABI further reinforces this definition in \S
1240 We must follow this definition exactly. The structure tag
1241 name is visible in C++ mangled names, and thus forms a part
1242 of the ABI. The field name may be used by people who
1243 #include <stdarg.h>. */
1244 /* Create the type. */
1245 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1246 /* Give it the required name. */
1247 va_list_name = build_decl (BUILTINS_LOCATION,
1249 get_identifier ("__va_list"),
1251 DECL_ARTIFICIAL (va_list_name) = 1;
1252 TYPE_NAME (va_list_type) = va_list_name;
1253 TYPE_STUB_DECL (va_list_type) = va_list_name;
1254 /* Create the __ap field. */
1255 ap_field = build_decl (BUILTINS_LOCATION,
1257 get_identifier ("__ap"),
1259 DECL_ARTIFICIAL (ap_field) = 1;
1260 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1261 TYPE_FIELDS (va_list_type) = ap_field;
1262 /* Compute its layout. */
1263 layout_type (va_list_type);
1265 return va_list_type;
1268 /* Return an expression of type "void *" pointing to the next
1269 available argument in a variable-argument list. VALIST is the
1270 user-level va_list object, of type __builtin_va_list. */
1272 arm_extract_valist_ptr (tree valist)
1274 if (TREE_TYPE (valist) == error_mark_node)
1275 return error_mark_node;
1277 /* On an AAPCS target, the pointer is stored within "struct
1279 if (TARGET_AAPCS_BASED)
1281 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1282 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1283 valist, ap_field, NULL_TREE);
1289 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1291 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1293 valist = arm_extract_valist_ptr (valist);
1294 std_expand_builtin_va_start (valist, nextarg);
1297 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1299 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1302 valist = arm_extract_valist_ptr (valist);
1303 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1306 /* Lookup NAME in SEL. */
1308 static const struct processors *
1309 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1311 if (!(name && *name))
1314 for (; sel->name != NULL; sel++)
1316 if (streq (name, sel->name))
1320 error ("bad value (%s) for %s switch", name, desc);
1324 /* Implement TARGET_HANDLE_OPTION. */
1327 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1332 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1336 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1339 case OPT_mhard_float:
1340 target_float_abi_name = "hard";
1343 case OPT_msoft_float:
1344 target_float_abi_name = "soft";
1348 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1357 arm_target_help (void)
1360 static int columns = 0;
1363 /* If we have not done so already, obtain the desired maximum width of
1364 the output. Note - this is a duplication of the code at the start of
1365 gcc/opts.c:print_specific_help() - the two copies should probably be
1366 replaced by a single function. */
1371 p = getenv ("COLUMNS");
1374 int value = atoi (p);
1381 /* Use a reasonable default. */
1385 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1387 /* The - 2 is because we know that the last entry in the array is NULL. */
1388 i = ARRAY_SIZE (all_cores) - 2;
1390 printf (" %s", all_cores[i].name);
1391 remaining = columns - (strlen (all_cores[i].name) + 4);
1392 gcc_assert (remaining >= 0);
1396 int len = strlen (all_cores[i].name);
1398 if (remaining > len + 2)
1400 printf (", %s", all_cores[i].name);
1401 remaining -= len + 2;
1407 printf ("\n %s", all_cores[i].name);
1408 remaining = columns - (len + 4);
1412 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1414 i = ARRAY_SIZE (all_architectures) - 2;
1417 printf (" %s", all_architectures[i].name);
1418 remaining = columns - (strlen (all_architectures[i].name) + 4);
1419 gcc_assert (remaining >= 0);
1423 int len = strlen (all_architectures[i].name);
1425 if (remaining > len + 2)
1427 printf (", %s", all_architectures[i].name);
1428 remaining -= len + 2;
1434 printf ("\n %s", all_architectures[i].name);
1435 remaining = columns - (len + 4);
1442 /* Fix up any incompatible options that the user has specified. */
1444 arm_option_override (void)
1448 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1449 SUBTARGET_OVERRIDE_OPTIONS;
1452 if (arm_selected_arch)
1454 if (arm_selected_cpu)
1456 /* Check for conflict between mcpu and march. */
1457 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1459 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1460 arm_selected_cpu->name, arm_selected_arch->name);
1461 /* -march wins for code generation.
1462 -mcpu wins for default tuning. */
1463 if (!arm_selected_tune)
1464 arm_selected_tune = arm_selected_cpu;
1466 arm_selected_cpu = arm_selected_arch;
1470 arm_selected_arch = NULL;
1473 /* Pick a CPU based on the architecture. */
1474 arm_selected_cpu = arm_selected_arch;
1477 /* If the user did not specify a processor, choose one for them. */
1478 if (!arm_selected_cpu)
1480 const struct processors * sel;
1481 unsigned int sought;
1483 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1484 if (!arm_selected_cpu->name)
1486 #ifdef SUBTARGET_CPU_DEFAULT
1487 /* Use the subtarget default CPU if none was specified by
1489 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1491 /* Default to ARM6. */
1492 if (!arm_selected_cpu->name)
1493 arm_selected_cpu = &all_cores[arm6];
1496 sel = arm_selected_cpu;
1497 insn_flags = sel->flags;
1499 /* Now check to see if the user has specified some command line
1500 switch that require certain abilities from the cpu. */
1503 if (TARGET_INTERWORK || TARGET_THUMB)
1505 sought |= (FL_THUMB | FL_MODE32);
1507 /* There are no ARM processors that support both APCS-26 and
1508 interworking. Therefore we force FL_MODE26 to be removed
1509 from insn_flags here (if it was set), so that the search
1510 below will always be able to find a compatible processor. */
1511 insn_flags &= ~FL_MODE26;
1514 if (sought != 0 && ((sought & insn_flags) != sought))
1516 /* Try to locate a CPU type that supports all of the abilities
1517 of the default CPU, plus the extra abilities requested by
1519 for (sel = all_cores; sel->name != NULL; sel++)
1520 if ((sel->flags & sought) == (sought | insn_flags))
1523 if (sel->name == NULL)
1525 unsigned current_bit_count = 0;
1526 const struct processors * best_fit = NULL;
1528 /* Ideally we would like to issue an error message here
1529 saying that it was not possible to find a CPU compatible
1530 with the default CPU, but which also supports the command
1531 line options specified by the programmer, and so they
1532 ought to use the -mcpu=<name> command line option to
1533 override the default CPU type.
1535 If we cannot find a cpu that has both the
1536 characteristics of the default cpu and the given
1537 command line options we scan the array again looking
1538 for a best match. */
1539 for (sel = all_cores; sel->name != NULL; sel++)
1540 if ((sel->flags & sought) == sought)
1544 count = bit_count (sel->flags & insn_flags);
1546 if (count >= current_bit_count)
1549 current_bit_count = count;
1553 gcc_assert (best_fit);
1557 arm_selected_cpu = sel;
1561 gcc_assert (arm_selected_cpu);
1562 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1563 if (!arm_selected_tune)
1564 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1566 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1567 insn_flags = arm_selected_cpu->flags;
1569 arm_tune = arm_selected_tune->core;
1570 tune_flags = arm_selected_tune->flags;
1571 current_tune = arm_selected_tune->tune;
1573 if (target_fp16_format_name)
1575 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1577 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1579 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1583 if (i == ARRAY_SIZE (all_fp16_formats))
1584 error ("invalid __fp16 format option: -mfp16-format=%s",
1585 target_fp16_format_name);
1588 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1590 if (target_abi_name)
1592 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1594 if (streq (arm_all_abis[i].name, target_abi_name))
1596 arm_abi = arm_all_abis[i].abi_type;
1600 if (i == ARRAY_SIZE (arm_all_abis))
1601 error ("invalid ABI option: -mabi=%s", target_abi_name);
1604 arm_abi = ARM_DEFAULT_ABI;
1606 /* Make sure that the processor choice does not conflict with any of the
1607 other command line choices. */
1608 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1609 error ("target CPU does not support ARM mode");
1611 /* BPABI targets use linker tricks to allow interworking on cores
1612 without thumb support. */
1613 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1615 warning (0, "target CPU does not support interworking" );
1616 target_flags &= ~MASK_INTERWORK;
1619 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1621 warning (0, "target CPU does not support THUMB instructions");
1622 target_flags &= ~MASK_THUMB;
1625 if (TARGET_APCS_FRAME && TARGET_THUMB)
1627 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1628 target_flags &= ~MASK_APCS_FRAME;
1631 /* Callee super interworking implies thumb interworking. Adding
1632 this to the flags here simplifies the logic elsewhere. */
1633 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1634 target_flags |= MASK_INTERWORK;
1636 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1637 from here where no function is being compiled currently. */
1638 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1639 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1641 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1642 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1644 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1646 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1647 target_flags |= MASK_APCS_FRAME;
1650 if (TARGET_POKE_FUNCTION_NAME)
1651 target_flags |= MASK_APCS_FRAME;
1653 if (TARGET_APCS_REENT && flag_pic)
1654 error ("-fpic and -mapcs-reent are incompatible");
1656 if (TARGET_APCS_REENT)
1657 warning (0, "APCS reentrant code not supported. Ignored");
1659 /* If this target is normally configured to use APCS frames, warn if they
1660 are turned off and debugging is turned on. */
1662 && write_symbols != NO_DEBUG
1663 && !TARGET_APCS_FRAME
1664 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1665 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1667 if (TARGET_APCS_FLOAT)
1668 warning (0, "passing floating point arguments in fp regs not yet supported");
1670 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1671 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1672 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1673 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1674 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1675 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1676 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1677 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1678 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1679 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1680 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1681 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1682 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1683 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1685 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1686 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1687 thumb_code = TARGET_ARM == 0;
1688 thumb1_code = TARGET_THUMB1 != 0;
1689 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1690 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1691 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1692 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1693 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1695 /* If we are not using the default (ARM mode) section anchor offset
1696 ranges, then set the correct ranges now. */
1699 /* Thumb-1 LDR instructions cannot have negative offsets.
1700 Permissible positive offset ranges are 5-bit (for byte loads),
1701 6-bit (for halfword loads), or 7-bit (for word loads).
1702 Empirical results suggest a 7-bit anchor range gives the best
1703 overall code size. */
1704 targetm.min_anchor_offset = 0;
1705 targetm.max_anchor_offset = 127;
1707 else if (TARGET_THUMB2)
1709 /* The minimum is set such that the total size of the block
1710 for a particular anchor is 248 + 1 + 4095 bytes, which is
1711 divisible by eight, ensuring natural spacing of anchors. */
1712 targetm.min_anchor_offset = -248;
1713 targetm.max_anchor_offset = 4095;
1716 /* V5 code we generate is completely interworking capable, so we turn off
1717 TARGET_INTERWORK here to avoid many tests later on. */
1719 /* XXX However, we must pass the right pre-processor defines to CPP
1720 or GLD can get confused. This is a hack. */
1721 if (TARGET_INTERWORK)
1722 arm_cpp_interwork = 1;
1725 target_flags &= ~MASK_INTERWORK;
1727 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1728 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1730 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1731 error ("iwmmxt abi requires an iwmmxt capable cpu");
1733 if (target_fpu_name == NULL && target_fpe_name != NULL)
1735 if (streq (target_fpe_name, "2"))
1736 target_fpu_name = "fpe2";
1737 else if (streq (target_fpe_name, "3"))
1738 target_fpu_name = "fpe3";
1740 error ("invalid floating point emulation option: -mfpe=%s",
1744 if (target_fpu_name == NULL)
1746 #ifdef FPUTYPE_DEFAULT
1747 target_fpu_name = FPUTYPE_DEFAULT;
1749 if (arm_arch_cirrus)
1750 target_fpu_name = "maverick";
1752 target_fpu_name = "fpe2";
1756 arm_fpu_desc = NULL;
1757 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1759 if (streq (all_fpus[i].name, target_fpu_name))
1761 arm_fpu_desc = &all_fpus[i];
1768 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1772 switch (arm_fpu_desc->model)
1774 case ARM_FP_MODEL_FPA:
1775 if (arm_fpu_desc->rev == 2)
1776 arm_fpu_attr = FPU_FPE2;
1777 else if (arm_fpu_desc->rev == 3)
1778 arm_fpu_attr = FPU_FPE3;
1780 arm_fpu_attr = FPU_FPA;
1783 case ARM_FP_MODEL_MAVERICK:
1784 arm_fpu_attr = FPU_MAVERICK;
1787 case ARM_FP_MODEL_VFP:
1788 arm_fpu_attr = FPU_VFP;
1795 if (target_float_abi_name != NULL)
1797 /* The user specified a FP ABI. */
1798 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1800 if (streq (all_float_abis[i].name, target_float_abi_name))
1802 arm_float_abi = all_float_abis[i].abi_type;
1806 if (i == ARRAY_SIZE (all_float_abis))
1807 error ("invalid floating point abi: -mfloat-abi=%s",
1808 target_float_abi_name);
1811 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1813 if (TARGET_AAPCS_BASED
1814 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1815 error ("FPA is unsupported in the AAPCS");
1817 if (TARGET_AAPCS_BASED)
1819 if (TARGET_CALLER_INTERWORKING)
1820 error ("AAPCS does not support -mcaller-super-interworking");
1822 if (TARGET_CALLEE_INTERWORKING)
1823 error ("AAPCS does not support -mcallee-super-interworking");
1826 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1827 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1828 will ever exist. GCC makes no attempt to support this combination. */
1829 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1830 sorry ("iWMMXt and hardware floating point");
1832 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1833 if (TARGET_THUMB2 && TARGET_IWMMXT)
1834 sorry ("Thumb-2 iWMMXt");
1836 /* __fp16 support currently assumes the core has ldrh. */
1837 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1838 sorry ("__fp16 and no ldrh");
1840 /* If soft-float is specified then don't use FPU. */
1841 if (TARGET_SOFT_FLOAT)
1842 arm_fpu_attr = FPU_NONE;
1844 if (TARGET_AAPCS_BASED)
1846 if (arm_abi == ARM_ABI_IWMMXT)
1847 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1848 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1849 && TARGET_HARD_FLOAT
1851 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1853 arm_pcs_default = ARM_PCS_AAPCS;
1857 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1858 sorry ("-mfloat-abi=hard and VFP");
1860 if (arm_abi == ARM_ABI_APCS)
1861 arm_pcs_default = ARM_PCS_APCS;
1863 arm_pcs_default = ARM_PCS_ATPCS;
1866 /* For arm2/3 there is no need to do any scheduling if there is only
1867 a floating point emulator, or we are doing software floating-point. */
1868 if ((TARGET_SOFT_FLOAT
1869 || (TARGET_FPA && arm_fpu_desc->rev))
1870 && (tune_flags & FL_MODE32) == 0)
1871 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1873 if (target_thread_switch)
1875 if (strcmp (target_thread_switch, "soft") == 0)
1876 target_thread_pointer = TP_SOFT;
1877 else if (strcmp (target_thread_switch, "auto") == 0)
1878 target_thread_pointer = TP_AUTO;
1879 else if (strcmp (target_thread_switch, "cp15") == 0)
1880 target_thread_pointer = TP_CP15;
1882 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1885 /* Use the cp15 method if it is available. */
1886 if (target_thread_pointer == TP_AUTO)
1888 if (arm_arch6k && !TARGET_THUMB1)
1889 target_thread_pointer = TP_CP15;
1891 target_thread_pointer = TP_SOFT;
1894 if (TARGET_HARD_TP && TARGET_THUMB1)
1895 error ("can not use -mtp=cp15 with 16-bit Thumb");
1897 /* Override the default structure alignment for AAPCS ABI. */
1898 if (TARGET_AAPCS_BASED)
1899 arm_structure_size_boundary = 8;
1901 if (structure_size_string != NULL)
1903 int size = strtol (structure_size_string, NULL, 0);
1905 if (size == 8 || size == 32
1906 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1907 arm_structure_size_boundary = size;
1909 warning (0, "structure size boundary can only be set to %s",
1910 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1913 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1915 error ("RTP PIC is incompatible with Thumb");
1919 /* If stack checking is disabled, we can use r10 as the PIC register,
1920 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1921 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1923 if (TARGET_VXWORKS_RTP)
1924 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1925 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1928 if (flag_pic && TARGET_VXWORKS_RTP)
1929 arm_pic_register = 9;
1931 if (arm_pic_register_string != NULL)
1933 int pic_register = decode_reg_name (arm_pic_register_string);
1936 warning (0, "-mpic-register= is useless without -fpic");
1938 /* Prevent the user from choosing an obviously stupid PIC register. */
1939 else if (pic_register < 0 || call_used_regs[pic_register]
1940 || pic_register == HARD_FRAME_POINTER_REGNUM
1941 || pic_register == STACK_POINTER_REGNUM
1942 || pic_register >= PC_REGNUM
1943 || (TARGET_VXWORKS_RTP
1944 && (unsigned int) pic_register != arm_pic_register))
1945 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1947 arm_pic_register = pic_register;
1950 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1951 if (fix_cm3_ldrd == 2)
1953 if (arm_selected_cpu->core == cortexm3)
1959 if (TARGET_THUMB1 && flag_schedule_insns)
1961 /* Don't warn since it's on by default in -O2. */
1962 flag_schedule_insns = 0;
1967 /* If optimizing for size, bump the number of instructions that we
1968 are prepared to conditionally execute (even on a StrongARM). */
1969 max_insns_skipped = 6;
1973 /* StrongARM has early execution of branches, so a sequence
1974 that is worth skipping is shorter. */
1975 if (arm_tune_strongarm)
1976 max_insns_skipped = 3;
1979 /* Hot/Cold partitioning is not currently supported, since we can't
1980 handle literal pool placement in that case. */
1981 if (flag_reorder_blocks_and_partition)
1983 inform (input_location,
1984 "-freorder-blocks-and-partition not supported on this architecture");
1985 flag_reorder_blocks_and_partition = 0;
1986 flag_reorder_blocks = 1;
1990 /* Hoisting PIC address calculations more aggressively provides a small,
1991 but measurable, size reduction for PIC code. Therefore, we decrease
1992 the bar for unrestricted expression hoisting to the cost of PIC address
1993 calculation, which is 2 instructions. */
1994 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1995 global_options.x_param_values,
1996 global_options_set.x_param_values);
1998 /* ARM EABI defaults to strict volatile bitfields. */
1999 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2000 flag_strict_volatile_bitfields = 1;
2002 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2003 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2004 if (flag_prefetch_loop_arrays < 0
2007 && current_tune->num_prefetch_slots > 0)
2008 flag_prefetch_loop_arrays = 1;
2010 /* Set up parameters to be used in prefetching algorithm. Do not override the
2011 defaults unless we are tuning for a core we have researched values for. */
2012 if (current_tune->num_prefetch_slots > 0)
2013 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2014 current_tune->num_prefetch_slots,
2015 global_options.x_param_values,
2016 global_options_set.x_param_values);
2017 if (current_tune->l1_cache_line_size >= 0)
2018 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2019 current_tune->l1_cache_line_size,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2022 if (current_tune->l1_cache_size >= 0)
2023 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2024 current_tune->l1_cache_size,
2025 global_options.x_param_values,
2026 global_options_set.x_param_values);
2028 /* Register global variables with the garbage collector. */
2029 arm_add_gc_roots ();
2033 arm_add_gc_roots (void)
2035 gcc_obstack_init(&minipool_obstack);
2036 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2039 /* A table of known ARM exception types.
2040 For use with the interrupt function attribute. */
2044 const char *const arg;
2045 const unsigned long return_value;
2049 static const isr_attribute_arg isr_attribute_args [] =
2051 { "IRQ", ARM_FT_ISR },
2052 { "irq", ARM_FT_ISR },
2053 { "FIQ", ARM_FT_FIQ },
2054 { "fiq", ARM_FT_FIQ },
2055 { "ABORT", ARM_FT_ISR },
2056 { "abort", ARM_FT_ISR },
2057 { "ABORT", ARM_FT_ISR },
2058 { "abort", ARM_FT_ISR },
2059 { "UNDEF", ARM_FT_EXCEPTION },
2060 { "undef", ARM_FT_EXCEPTION },
2061 { "SWI", ARM_FT_EXCEPTION },
2062 { "swi", ARM_FT_EXCEPTION },
2063 { NULL, ARM_FT_NORMAL }
2066 /* Returns the (interrupt) function type of the current
2067 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2069 static unsigned long
2070 arm_isr_value (tree argument)
2072 const isr_attribute_arg * ptr;
2076 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2078 /* No argument - default to IRQ. */
2079 if (argument == NULL_TREE)
2082 /* Get the value of the argument. */
2083 if (TREE_VALUE (argument) == NULL_TREE
2084 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2085 return ARM_FT_UNKNOWN;
2087 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2089 /* Check it against the list of known arguments. */
2090 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2091 if (streq (arg, ptr->arg))
2092 return ptr->return_value;
2094 /* An unrecognized interrupt type. */
2095 return ARM_FT_UNKNOWN;
2098 /* Computes the type of the current function. */
2100 static unsigned long
2101 arm_compute_func_type (void)
2103 unsigned long type = ARM_FT_UNKNOWN;
2107 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2109 /* Decide if the current function is volatile. Such functions
2110 never return, and many memory cycles can be saved by not storing
2111 register values that will never be needed again. This optimization
2112 was added to speed up context switching in a kernel application. */
2114 && (TREE_NOTHROW (current_function_decl)
2115 || !(flag_unwind_tables
2117 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2118 && TREE_THIS_VOLATILE (current_function_decl))
2119 type |= ARM_FT_VOLATILE;
2121 if (cfun->static_chain_decl != NULL)
2122 type |= ARM_FT_NESTED;
2124 attr = DECL_ATTRIBUTES (current_function_decl);
2126 a = lookup_attribute ("naked", attr);
2128 type |= ARM_FT_NAKED;
2130 a = lookup_attribute ("isr", attr);
2132 a = lookup_attribute ("interrupt", attr);
2135 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2137 type |= arm_isr_value (TREE_VALUE (a));
2142 /* Returns the type of the current function. */
2145 arm_current_func_type (void)
2147 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2148 cfun->machine->func_type = arm_compute_func_type ();
2150 return cfun->machine->func_type;
2154 arm_allocate_stack_slots_for_args (void)
2156 /* Naked functions should not allocate stack slots for arguments. */
2157 return !IS_NAKED (arm_current_func_type ());
2161 /* Output assembler code for a block containing the constant parts
2162 of a trampoline, leaving space for the variable parts.
2164 On the ARM, (if r8 is the static chain regnum, and remembering that
2165 referencing pc adds an offset of 8) the trampoline looks like:
2168 .word static chain value
2169 .word function's address
2170 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2173 arm_asm_trampoline_template (FILE *f)
2177 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2178 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2180 else if (TARGET_THUMB2)
2182 /* The Thumb-2 trampoline is similar to the arm implementation.
2183 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2184 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2185 STATIC_CHAIN_REGNUM, PC_REGNUM);
2186 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2190 ASM_OUTPUT_ALIGN (f, 2);
2191 fprintf (f, "\t.code\t16\n");
2192 fprintf (f, ".Ltrampoline_start:\n");
2193 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2194 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2195 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2196 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2197 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2198 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2200 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2201 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2204 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2207 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2209 rtx fnaddr, mem, a_tramp;
2211 emit_block_move (m_tramp, assemble_trampoline_template (),
2212 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2214 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2215 emit_move_insn (mem, chain_value);
2217 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2218 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2219 emit_move_insn (mem, fnaddr);
2221 a_tramp = XEXP (m_tramp, 0);
2222 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2223 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2224 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2227 /* Thumb trampolines should be entered in thumb mode, so set
2228 the bottom bit of the address. */
2231 arm_trampoline_adjust_address (rtx addr)
2234 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2235 NULL, 0, OPTAB_LIB_WIDEN);
2239 /* Return 1 if it is possible to return using a single instruction.
2240 If SIBLING is non-null, this is a test for a return before a sibling
2241 call. SIBLING is the call insn, so we can examine its register usage. */
2244 use_return_insn (int iscond, rtx sibling)
2247 unsigned int func_type;
2248 unsigned long saved_int_regs;
2249 unsigned HOST_WIDE_INT stack_adjust;
2250 arm_stack_offsets *offsets;
2252 /* Never use a return instruction before reload has run. */
2253 if (!reload_completed)
2256 func_type = arm_current_func_type ();
2258 /* Naked, volatile and stack alignment functions need special
2260 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2263 /* So do interrupt functions that use the frame pointer and Thumb
2264 interrupt functions. */
2265 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2268 offsets = arm_get_frame_offsets ();
2269 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2271 /* As do variadic functions. */
2272 if (crtl->args.pretend_args_size
2273 || cfun->machine->uses_anonymous_args
2274 /* Or if the function calls __builtin_eh_return () */
2275 || crtl->calls_eh_return
2276 /* Or if the function calls alloca */
2277 || cfun->calls_alloca
2278 /* Or if there is a stack adjustment. However, if the stack pointer
2279 is saved on the stack, we can use a pre-incrementing stack load. */
2280 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2281 && stack_adjust == 4)))
2284 saved_int_regs = offsets->saved_regs_mask;
2286 /* Unfortunately, the insn
2288 ldmib sp, {..., sp, ...}
2290 triggers a bug on most SA-110 based devices, such that the stack
2291 pointer won't be correctly restored if the instruction takes a
2292 page fault. We work around this problem by popping r3 along with
2293 the other registers, since that is never slower than executing
2294 another instruction.
2296 We test for !arm_arch5 here, because code for any architecture
2297 less than this could potentially be run on one of the buggy
2299 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2301 /* Validate that r3 is a call-clobbered register (always true in
2302 the default abi) ... */
2303 if (!call_used_regs[3])
2306 /* ... that it isn't being used for a return value ... */
2307 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2310 /* ... or for a tail-call argument ... */
2313 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2315 if (find_regno_fusage (sibling, USE, 3))
2319 /* ... and that there are no call-saved registers in r0-r2
2320 (always true in the default ABI). */
2321 if (saved_int_regs & 0x7)
2325 /* Can't be done if interworking with Thumb, and any registers have been
2327 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2330 /* On StrongARM, conditional returns are expensive if they aren't
2331 taken and multiple registers have been stacked. */
2332 if (iscond && arm_tune_strongarm)
2334 /* Conditional return when just the LR is stored is a simple
2335 conditional-load instruction, that's not expensive. */
2336 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2340 && arm_pic_register != INVALID_REGNUM
2341 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2345 /* If there are saved registers but the LR isn't saved, then we need
2346 two instructions for the return. */
2347 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2350 /* Can't be done if any of the FPA regs are pushed,
2351 since this also requires an insn. */
2352 if (TARGET_HARD_FLOAT && TARGET_FPA)
2353 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2354 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2357 /* Likewise VFP regs. */
2358 if (TARGET_HARD_FLOAT && TARGET_VFP)
2359 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2360 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2363 if (TARGET_REALLY_IWMMXT)
2364 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2365 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2371 /* Return TRUE if int I is a valid immediate ARM constant. */
2374 const_ok_for_arm (HOST_WIDE_INT i)
2378 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2379 be all zero, or all one. */
2380 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2381 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2382 != ((~(unsigned HOST_WIDE_INT) 0)
2383 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2386 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2388 /* Fast return for 0 and small values. We must do this for zero, since
2389 the code below can't handle that one case. */
2390 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2393 /* Get the number of trailing zeros. */
2394 lowbit = ffs((int) i) - 1;
2396 /* Only even shifts are allowed in ARM mode so round down to the
2397 nearest even number. */
2401 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2406 /* Allow rotated constants in ARM mode. */
2408 && ((i & ~0xc000003f) == 0
2409 || (i & ~0xf000000f) == 0
2410 || (i & ~0xfc000003) == 0))
2417 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2420 if (i == v || i == (v | (v << 8)))
2423 /* Allow repeated pattern 0xXY00XY00. */
2433 /* Return true if I is a valid constant for the operation CODE. */
2435 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2437 if (const_ok_for_arm (i))
2461 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2463 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2473 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2480 /* Emit a sequence of insns to handle a large constant.
2481 CODE is the code of the operation required, it can be any of SET, PLUS,
2482 IOR, AND, XOR, MINUS;
2483 MODE is the mode in which the operation is being performed;
2484 VAL is the integer to operate on;
2485 SOURCE is the other operand (a register, or a null-pointer for SET);
2486 SUBTARGETS means it is safe to create scratch registers if that will
2487 either produce a simpler sequence, or we will want to cse the values.
2488 Return value is the number of insns emitted. */
2490 /* ??? Tweak this for thumb2. */
2492 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2493 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2497 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2498 cond = COND_EXEC_TEST (PATTERN (insn));
2502 if (subtargets || code == SET
2503 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2504 && REGNO (target) != REGNO (source)))
2506 /* After arm_reorg has been called, we can't fix up expensive
2507 constants by pushing them into memory so we must synthesize
2508 them in-line, regardless of the cost. This is only likely to
2509 be more costly on chips that have load delay slots and we are
2510 compiling without running the scheduler (so no splitting
2511 occurred before the final instruction emission).
2513 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2515 if (!after_arm_reorg
2517 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2519 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2524 /* Currently SET is the only monadic value for CODE, all
2525 the rest are diadic. */
2526 if (TARGET_USE_MOVT)
2527 arm_emit_movpair (target, GEN_INT (val));
2529 emit_set_insn (target, GEN_INT (val));
2535 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2537 if (TARGET_USE_MOVT)
2538 arm_emit_movpair (temp, GEN_INT (val));
2540 emit_set_insn (temp, GEN_INT (val));
2542 /* For MINUS, the value is subtracted from, since we never
2543 have subtraction of a constant. */
2545 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2547 emit_set_insn (target,
2548 gen_rtx_fmt_ee (code, mode, source, temp));
2554 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2558 /* Return the number of instructions required to synthesize the given
2559 constant, if we start emitting them from bit-position I. */
2561 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2563 HOST_WIDE_INT temp1;
2564 int step_size = TARGET_ARM ? 2 : 1;
2567 gcc_assert (TARGET_ARM || i == 0);
2575 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2580 temp1 = remainder & ((0x0ff << end)
2581 | ((i < end) ? (0xff >> (32 - end)) : 0));
2582 remainder &= ~temp1;
2587 } while (remainder);
2592 find_best_start (unsigned HOST_WIDE_INT remainder)
2594 int best_consecutive_zeros = 0;
2598 /* If we aren't targetting ARM, the best place to start is always at
2603 for (i = 0; i < 32; i += 2)
2605 int consecutive_zeros = 0;
2607 if (!(remainder & (3 << i)))
2609 while ((i < 32) && !(remainder & (3 << i)))
2611 consecutive_zeros += 2;
2614 if (consecutive_zeros > best_consecutive_zeros)
2616 best_consecutive_zeros = consecutive_zeros;
2617 best_start = i - consecutive_zeros;
2623 /* So long as it won't require any more insns to do so, it's
2624 desirable to emit a small constant (in bits 0...9) in the last
2625 insn. This way there is more chance that it can be combined with
2626 a later addressing insn to form a pre-indexed load or store
2627 operation. Consider:
2629 *((volatile int *)0xe0000100) = 1;
2630 *((volatile int *)0xe0000110) = 2;
2632 We want this to wind up as:
2636 str rB, [rA, #0x100]
2638 str rB, [rA, #0x110]
2640 rather than having to synthesize both large constants from scratch.
2642 Therefore, we calculate how many insns would be required to emit
2643 the constant starting from `best_start', and also starting from
2644 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2645 yield a shorter sequence, we may as well use zero. */
2647 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2648 && (count_insns_for_constant (remainder, 0) <=
2649 count_insns_for_constant (remainder, best_start)))
2655 /* Emit an instruction with the indicated PATTERN. If COND is
2656 non-NULL, conditionalize the execution of the instruction on COND
2660 emit_constant_insn (rtx cond, rtx pattern)
2663 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2664 emit_insn (pattern);
2667 /* As above, but extra parameter GENERATE which, if clear, suppresses
2669 /* ??? This needs more work for thumb2. */
2672 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2673 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2678 int final_invert = 0;
2679 int can_negate_initial = 0;
2681 int num_bits_set = 0;
2682 int set_sign_bit_copies = 0;
2683 int clear_sign_bit_copies = 0;
2684 int clear_zero_bit_copies = 0;
2685 int set_zero_bit_copies = 0;
2687 unsigned HOST_WIDE_INT temp1, temp2;
2688 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2689 int step_size = TARGET_ARM ? 2 : 1;
2691 /* Find out which operations are safe for a given CODE. Also do a quick
2692 check for degenerate cases; these can occur when DImode operations
2703 can_negate_initial = 1;
2707 if (remainder == 0xffffffff)
2710 emit_constant_insn (cond,
2711 gen_rtx_SET (VOIDmode, target,
2712 GEN_INT (ARM_SIGN_EXTEND (val))));
2718 if (reload_completed && rtx_equal_p (target, source))
2722 emit_constant_insn (cond,
2723 gen_rtx_SET (VOIDmode, target, source));
2735 emit_constant_insn (cond,
2736 gen_rtx_SET (VOIDmode, target, const0_rtx));
2739 if (remainder == 0xffffffff)
2741 if (reload_completed && rtx_equal_p (target, source))
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, source));
2754 if (reload_completed && rtx_equal_p (target, source))
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, target, source));
2762 if (remainder == 0xffffffff)
2765 emit_constant_insn (cond,
2766 gen_rtx_SET (VOIDmode, target,
2767 gen_rtx_NOT (mode, source)));
2773 /* We treat MINUS as (val - source), since (source - val) is always
2774 passed as (source + (-val)). */
2778 emit_constant_insn (cond,
2779 gen_rtx_SET (VOIDmode, target,
2780 gen_rtx_NEG (mode, source)));
2783 if (const_ok_for_arm (val))
2786 emit_constant_insn (cond,
2787 gen_rtx_SET (VOIDmode, target,
2788 gen_rtx_MINUS (mode, GEN_INT (val),
2800 /* If we can do it in one insn get out quickly. */
2801 if (const_ok_for_arm (val)
2802 || (can_negate_initial && const_ok_for_arm (-val))
2803 || (can_invert && const_ok_for_arm (~val)))
2806 emit_constant_insn (cond,
2807 gen_rtx_SET (VOIDmode, target,
2809 ? gen_rtx_fmt_ee (code, mode, source,
2815 /* Calculate a few attributes that may be useful for specific
2817 /* Count number of leading zeros. */
2818 for (i = 31; i >= 0; i--)
2820 if ((remainder & (1 << i)) == 0)
2821 clear_sign_bit_copies++;
2826 /* Count number of leading 1's. */
2827 for (i = 31; i >= 0; i--)
2829 if ((remainder & (1 << i)) != 0)
2830 set_sign_bit_copies++;
2835 /* Count number of trailing zero's. */
2836 for (i = 0; i <= 31; i++)
2838 if ((remainder & (1 << i)) == 0)
2839 clear_zero_bit_copies++;
2844 /* Count number of trailing 1's. */
2845 for (i = 0; i <= 31; i++)
2847 if ((remainder & (1 << i)) != 0)
2848 set_zero_bit_copies++;
2856 /* See if we can use movw. */
2857 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2860 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2865 /* See if we can do this by sign_extending a constant that is known
2866 to be negative. This is a good, way of doing it, since the shift
2867 may well merge into a subsequent insn. */
2868 if (set_sign_bit_copies > 1)
2870 if (const_ok_for_arm
2871 (temp1 = ARM_SIGN_EXTEND (remainder
2872 << (set_sign_bit_copies - 1))))
2876 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2877 emit_constant_insn (cond,
2878 gen_rtx_SET (VOIDmode, new_src,
2880 emit_constant_insn (cond,
2881 gen_ashrsi3 (target, new_src,
2882 GEN_INT (set_sign_bit_copies - 1)));
2886 /* For an inverted constant, we will need to set the low bits,
2887 these will be shifted out of harm's way. */
2888 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2889 if (const_ok_for_arm (~temp1))
2893 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2894 emit_constant_insn (cond,
2895 gen_rtx_SET (VOIDmode, new_src,
2897 emit_constant_insn (cond,
2898 gen_ashrsi3 (target, new_src,
2899 GEN_INT (set_sign_bit_copies - 1)));
2905 /* See if we can calculate the value as the difference between two
2906 valid immediates. */
2907 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2909 int topshift = clear_sign_bit_copies & ~1;
2911 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2912 & (0xff000000 >> topshift));
2914 /* If temp1 is zero, then that means the 9 most significant
2915 bits of remainder were 1 and we've caused it to overflow.
2916 When topshift is 0 we don't need to do anything since we
2917 can borrow from 'bit 32'. */
2918 if (temp1 == 0 && topshift != 0)
2919 temp1 = 0x80000000 >> (topshift - 1);
2921 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2923 if (const_ok_for_arm (temp2))
2927 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, new_src,
2931 emit_constant_insn (cond,
2932 gen_addsi3 (target, new_src,
2940 /* See if we can generate this by setting the bottom (or the top)
2941 16 bits, and then shifting these into the other half of the
2942 word. We only look for the simplest cases, to do more would cost
2943 too much. Be careful, however, not to generate this when the
2944 alternative would take fewer insns. */
2945 if (val & 0xffff0000)
2947 temp1 = remainder & 0xffff0000;
2948 temp2 = remainder & 0x0000ffff;
2950 /* Overlaps outside this range are best done using other methods. */
2951 for (i = 9; i < 24; i++)
2953 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2954 && !const_ok_for_arm (temp2))
2956 rtx new_src = (subtargets
2957 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2959 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2960 source, subtargets, generate);
2968 gen_rtx_ASHIFT (mode, source,
2975 /* Don't duplicate cases already considered. */
2976 for (i = 17; i < 24; i++)
2978 if (((temp1 | (temp1 >> i)) == remainder)
2979 && !const_ok_for_arm (temp1))
2981 rtx new_src = (subtargets
2982 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2984 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2985 source, subtargets, generate);
2990 gen_rtx_SET (VOIDmode, target,
2993 gen_rtx_LSHIFTRT (mode, source,
3004 /* If we have IOR or XOR, and the constant can be loaded in a
3005 single instruction, and we can find a temporary to put it in,
3006 then this can be done in two instructions instead of 3-4. */
3008 /* TARGET can't be NULL if SUBTARGETS is 0 */
3009 || (reload_completed && !reg_mentioned_p (target, source)))
3011 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3015 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3017 emit_constant_insn (cond,
3018 gen_rtx_SET (VOIDmode, sub,
3020 emit_constant_insn (cond,
3021 gen_rtx_SET (VOIDmode, target,
3022 gen_rtx_fmt_ee (code, mode,
3033 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3034 and the remainder 0s for e.g. 0xfff00000)
3035 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3037 This can be done in 2 instructions by using shifts with mov or mvn.
3042 mvn r0, r0, lsr #12 */
3043 if (set_sign_bit_copies > 8
3044 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3048 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3049 rtx shift = GEN_INT (set_sign_bit_copies);
3053 gen_rtx_SET (VOIDmode, sub,
3055 gen_rtx_ASHIFT (mode,
3060 gen_rtx_SET (VOIDmode, target,
3062 gen_rtx_LSHIFTRT (mode, sub,
3069 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3071 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3073 For eg. r0 = r0 | 0xfff
3078 if (set_zero_bit_copies > 8
3079 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3083 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3084 rtx shift = GEN_INT (set_zero_bit_copies);
3088 gen_rtx_SET (VOIDmode, sub,
3090 gen_rtx_LSHIFTRT (mode,
3095 gen_rtx_SET (VOIDmode, target,
3097 gen_rtx_ASHIFT (mode, sub,
3103 /* This will never be reached for Thumb2 because orn is a valid
3104 instruction. This is for Thumb1 and the ARM 32 bit cases.
3106 x = y | constant (such that ~constant is a valid constant)
3108 x = ~(~y & ~constant).
3110 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3114 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3115 emit_constant_insn (cond,
3116 gen_rtx_SET (VOIDmode, sub,
3117 gen_rtx_NOT (mode, source)));
3120 sub = gen_reg_rtx (mode);
3121 emit_constant_insn (cond,
3122 gen_rtx_SET (VOIDmode, sub,
3123 gen_rtx_AND (mode, source,
3125 emit_constant_insn (cond,
3126 gen_rtx_SET (VOIDmode, target,
3127 gen_rtx_NOT (mode, sub)));
3134 /* See if two shifts will do 2 or more insn's worth of work. */
3135 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3137 HOST_WIDE_INT shift_mask = ((0xffffffff
3138 << (32 - clear_sign_bit_copies))
3141 if ((remainder | shift_mask) != 0xffffffff)
3145 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3146 insns = arm_gen_constant (AND, mode, cond,
3147 remainder | shift_mask,
3148 new_src, source, subtargets, 1);
3153 rtx targ = subtargets ? NULL_RTX : target;
3154 insns = arm_gen_constant (AND, mode, cond,
3155 remainder | shift_mask,
3156 targ, source, subtargets, 0);
3162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3163 rtx shift = GEN_INT (clear_sign_bit_copies);
3165 emit_insn (gen_ashlsi3 (new_src, source, shift));
3166 emit_insn (gen_lshrsi3 (target, new_src, shift));
3172 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3174 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3176 if ((remainder | shift_mask) != 0xffffffff)
3180 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3182 insns = arm_gen_constant (AND, mode, cond,
3183 remainder | shift_mask,
3184 new_src, source, subtargets, 1);
3189 rtx targ = subtargets ? NULL_RTX : target;
3191 insns = arm_gen_constant (AND, mode, cond,
3192 remainder | shift_mask,
3193 targ, source, subtargets, 0);
3199 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3200 rtx shift = GEN_INT (clear_zero_bit_copies);
3202 emit_insn (gen_lshrsi3 (new_src, source, shift));
3203 emit_insn (gen_ashlsi3 (target, new_src, shift));
3215 for (i = 0; i < 32; i++)
3216 if (remainder & (1 << i))
3220 || (code != IOR && can_invert && num_bits_set > 16))
3221 remainder ^= 0xffffffff;
3222 else if (code == PLUS && num_bits_set > 16)
3223 remainder = (-remainder) & 0xffffffff;
3225 /* For XOR, if more than half the bits are set and there's a sequence
3226 of more than 8 consecutive ones in the pattern then we can XOR by the
3227 inverted constant and then invert the final result; this may save an
3228 instruction and might also lead to the final mvn being merged with
3229 some other operation. */
3230 else if (code == XOR && num_bits_set > 16
3231 && (count_insns_for_constant (remainder ^ 0xffffffff,
3233 (remainder ^ 0xffffffff))
3234 < count_insns_for_constant (remainder,
3235 find_best_start (remainder))))
3237 remainder ^= 0xffffffff;
3246 /* Now try and find a way of doing the job in either two or three
3248 We start by looking for the largest block of zeros that are aligned on
3249 a 2-bit boundary, we then fill up the temps, wrapping around to the
3250 top of the word when we drop off the bottom.
3251 In the worst case this code should produce no more than four insns.
3252 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3253 best place to start. */
3255 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3258 /* Now start emitting the insns. */
3259 i = find_best_start (remainder);
3266 if (remainder & (3 << (i - 2)))
3271 temp1 = remainder & ((0x0ff << end)
3272 | ((i < end) ? (0xff >> (32 - end)) : 0));
3273 remainder &= ~temp1;
3277 rtx new_src, temp1_rtx;
3279 if (code == SET || code == MINUS)
3281 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3282 if (can_invert && code != MINUS)
3287 if ((final_invert || remainder) && subtargets)
3288 new_src = gen_reg_rtx (mode);
3293 else if (can_negate)
3297 temp1 = trunc_int_for_mode (temp1, mode);
3298 temp1_rtx = GEN_INT (temp1);
3302 else if (code == MINUS)
3303 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3305 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3307 emit_constant_insn (cond,
3308 gen_rtx_SET (VOIDmode, new_src,
3318 else if (code == MINUS)
3324 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3334 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3335 gen_rtx_NOT (mode, source)));
3342 /* Canonicalize a comparison so that we are more likely to recognize it.
3343 This can be done for a few constant compares, where we can make the
3344 immediate value easier to load. */
3347 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3349 enum machine_mode mode;
3350 unsigned HOST_WIDE_INT i, maxval;
3352 mode = GET_MODE (*op0);
3353 if (mode == VOIDmode)
3354 mode = GET_MODE (*op1);
3356 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3358 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3359 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3360 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3361 for GTU/LEU in Thumb mode. */
3366 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3368 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3371 if (code == GT || code == LE
3372 || (!TARGET_ARM && (code == GTU || code == LEU)))
3374 /* Missing comparison. First try to use an available
3376 if (GET_CODE (*op1) == CONST_INT)
3384 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3386 *op1 = GEN_INT (i + 1);
3387 return code == GT ? GE : LT;
3392 if (i != ~((unsigned HOST_WIDE_INT) 0)
3393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3395 *op1 = GEN_INT (i + 1);
3396 return code == GTU ? GEU : LTU;
3404 /* If that did not work, reverse the condition. */
3408 return swap_condition (code);
3414 /* Comparisons smaller than DImode. Only adjust comparisons against
3415 an out-of-range constant. */
3416 if (GET_CODE (*op1) != CONST_INT
3417 || const_ok_for_arm (INTVAL (*op1))
3418 || const_ok_for_arm (- INTVAL (*op1)))
3432 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3434 *op1 = GEN_INT (i + 1);
3435 return code == GT ? GE : LT;
3442 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3444 *op1 = GEN_INT (i - 1);
3445 return code == GE ? GT : LE;
3451 if (i != ~((unsigned HOST_WIDE_INT) 0)
3452 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3454 *op1 = GEN_INT (i + 1);
3455 return code == GTU ? GEU : LTU;
3462 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3464 *op1 = GEN_INT (i - 1);
3465 return code == GEU ? GTU : LEU;
3477 /* Define how to find the value returned by a function. */
3480 arm_function_value(const_tree type, const_tree func,
3481 bool outgoing ATTRIBUTE_UNUSED)
3483 enum machine_mode mode;
3484 int unsignedp ATTRIBUTE_UNUSED;
3485 rtx r ATTRIBUTE_UNUSED;
3487 mode = TYPE_MODE (type);
3489 if (TARGET_AAPCS_BASED)
3490 return aapcs_allocate_return_reg (mode, type, func);
3492 /* Promote integer types. */
3493 if (INTEGRAL_TYPE_P (type))
3494 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3496 /* Promotes small structs returned in a register to full-word size
3497 for big-endian AAPCS. */
3498 if (arm_return_in_msb (type))
3500 HOST_WIDE_INT size = int_size_in_bytes (type);
3501 if (size % UNITS_PER_WORD != 0)
3503 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3504 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3508 return LIBCALL_VALUE (mode);
3512 libcall_eq (const void *p1, const void *p2)
3514 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3518 libcall_hash (const void *p1)
3520 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3524 add_libcall (htab_t htab, rtx libcall)
3526 *htab_find_slot (htab, libcall, INSERT) = libcall;
3530 arm_libcall_uses_aapcs_base (const_rtx libcall)
3532 static bool init_done = false;
3533 static htab_t libcall_htab;
3539 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3541 add_libcall (libcall_htab,
3542 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3543 add_libcall (libcall_htab,
3544 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3545 add_libcall (libcall_htab,
3546 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3547 add_libcall (libcall_htab,
3548 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3550 add_libcall (libcall_htab,
3551 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3552 add_libcall (libcall_htab,
3553 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3554 add_libcall (libcall_htab,
3555 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3556 add_libcall (libcall_htab,
3557 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3559 add_libcall (libcall_htab,
3560 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3561 add_libcall (libcall_htab,
3562 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3567 add_libcall (libcall_htab,
3568 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3569 add_libcall (libcall_htab,
3570 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3573 return libcall && htab_find (libcall_htab, libcall) != NULL;
3577 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3579 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3580 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3582 /* The following libcalls return their result in integer registers,
3583 even though they return a floating point value. */
3584 if (arm_libcall_uses_aapcs_base (libcall))
3585 return gen_rtx_REG (mode, ARG_REGISTER(1));
3589 return LIBCALL_VALUE (mode);
3592 /* Determine the amount of memory needed to store the possible return
3593 registers of an untyped call. */
3595 arm_apply_result_size (void)
3601 if (TARGET_HARD_FLOAT_ABI)
3607 if (TARGET_MAVERICK)
3610 if (TARGET_IWMMXT_ABI)
3617 /* Decide whether TYPE should be returned in memory (true)
3618 or in a register (false). FNTYPE is the type of the function making
3621 arm_return_in_memory (const_tree type, const_tree fntype)
3625 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3627 if (TARGET_AAPCS_BASED)
3629 /* Simple, non-aggregate types (ie not including vectors and
3630 complex) are always returned in a register (or registers).
3631 We don't care about which register here, so we can short-cut
3632 some of the detail. */
3633 if (!AGGREGATE_TYPE_P (type)
3634 && TREE_CODE (type) != VECTOR_TYPE
3635 && TREE_CODE (type) != COMPLEX_TYPE)
3638 /* Any return value that is no larger than one word can be
3640 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3643 /* Check any available co-processors to see if they accept the
3644 type as a register candidate (VFP, for example, can return
3645 some aggregates in consecutive registers). These aren't
3646 available if the call is variadic. */
3647 if (aapcs_select_return_coproc (type, fntype) >= 0)
3650 /* Vector values should be returned using ARM registers, not
3651 memory (unless they're over 16 bytes, which will break since
3652 we only have four call-clobbered registers to play with). */
3653 if (TREE_CODE (type) == VECTOR_TYPE)
3654 return (size < 0 || size > (4 * UNITS_PER_WORD));
3656 /* The rest go in memory. */
3660 if (TREE_CODE (type) == VECTOR_TYPE)
3661 return (size < 0 || size > (4 * UNITS_PER_WORD));
3663 if (!AGGREGATE_TYPE_P (type) &&
3664 (TREE_CODE (type) != VECTOR_TYPE))
3665 /* All simple types are returned in registers. */
3668 if (arm_abi != ARM_ABI_APCS)
3670 /* ATPCS and later return aggregate types in memory only if they are
3671 larger than a word (or are variable size). */
3672 return (size < 0 || size > UNITS_PER_WORD);
3675 /* For the arm-wince targets we choose to be compatible with Microsoft's
3676 ARM and Thumb compilers, which always return aggregates in memory. */
3678 /* All structures/unions bigger than one word are returned in memory.
3679 Also catch the case where int_size_in_bytes returns -1. In this case
3680 the aggregate is either huge or of variable size, and in either case
3681 we will want to return it via memory and not in a register. */
3682 if (size < 0 || size > UNITS_PER_WORD)
3685 if (TREE_CODE (type) == RECORD_TYPE)
3689 /* For a struct the APCS says that we only return in a register
3690 if the type is 'integer like' and every addressable element
3691 has an offset of zero. For practical purposes this means
3692 that the structure can have at most one non bit-field element
3693 and that this element must be the first one in the structure. */
3695 /* Find the first field, ignoring non FIELD_DECL things which will
3696 have been created by C++. */
3697 for (field = TYPE_FIELDS (type);
3698 field && TREE_CODE (field) != FIELD_DECL;
3699 field = DECL_CHAIN (field))
3703 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3705 /* Check that the first field is valid for returning in a register. */
3707 /* ... Floats are not allowed */
3708 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3711 /* ... Aggregates that are not themselves valid for returning in
3712 a register are not allowed. */
3713 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3716 /* Now check the remaining fields, if any. Only bitfields are allowed,
3717 since they are not addressable. */
3718 for (field = DECL_CHAIN (field);
3720 field = DECL_CHAIN (field))
3722 if (TREE_CODE (field) != FIELD_DECL)
3725 if (!DECL_BIT_FIELD_TYPE (field))
3732 if (TREE_CODE (type) == UNION_TYPE)
3736 /* Unions can be returned in registers if every element is
3737 integral, or can be returned in an integer register. */
3738 for (field = TYPE_FIELDS (type);
3740 field = DECL_CHAIN (field))
3742 if (TREE_CODE (field) != FIELD_DECL)
3745 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3748 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3754 #endif /* not ARM_WINCE */
3756 /* Return all other types in memory. */
3760 /* Indicate whether or not words of a double are in big-endian order. */
3763 arm_float_words_big_endian (void)
3765 if (TARGET_MAVERICK)
3768 /* For FPA, float words are always big-endian. For VFP, floats words
3769 follow the memory system mode. */
3777 return (TARGET_BIG_END ? 1 : 0);
3782 const struct pcs_attribute_arg
3786 } pcs_attribute_args[] =
3788 {"aapcs", ARM_PCS_AAPCS},
3789 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3791 /* We could recognize these, but changes would be needed elsewhere
3792 * to implement them. */
3793 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3794 {"atpcs", ARM_PCS_ATPCS},
3795 {"apcs", ARM_PCS_APCS},
3797 {NULL, ARM_PCS_UNKNOWN}
3801 arm_pcs_from_attribute (tree attr)
3803 const struct pcs_attribute_arg *ptr;
3806 /* Get the value of the argument. */
3807 if (TREE_VALUE (attr) == NULL_TREE
3808 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3809 return ARM_PCS_UNKNOWN;
3811 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3813 /* Check it against the list of known arguments. */
3814 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3815 if (streq (arg, ptr->arg))
3818 /* An unrecognized interrupt type. */
3819 return ARM_PCS_UNKNOWN;
3822 /* Get the PCS variant to use for this call. TYPE is the function's type
3823 specification, DECL is the specific declartion. DECL may be null if
3824 the call could be indirect or if this is a library call. */
3826 arm_get_pcs_model (const_tree type, const_tree decl)
3828 bool user_convention = false;
3829 enum arm_pcs user_pcs = arm_pcs_default;
3834 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3837 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3838 user_convention = true;
3841 if (TARGET_AAPCS_BASED)
3843 /* Detect varargs functions. These always use the base rules
3844 (no argument is ever a candidate for a co-processor
3846 bool base_rules = stdarg_p (type);
3848 if (user_convention)
3850 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3851 sorry ("non-AAPCS derived PCS variant");
3852 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3853 error ("variadic functions must use the base AAPCS variant");
3857 return ARM_PCS_AAPCS;
3858 else if (user_convention)
3860 else if (decl && flag_unit_at_a_time)
3862 /* Local functions never leak outside this compilation unit,
3863 so we are free to use whatever conventions are
3865 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3866 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3868 return ARM_PCS_AAPCS_LOCAL;
3871 else if (user_convention && user_pcs != arm_pcs_default)
3872 sorry ("PCS variant");
3874 /* For everything else we use the target's default. */
3875 return arm_pcs_default;
3880 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3881 const_tree fntype ATTRIBUTE_UNUSED,
3882 rtx libcall ATTRIBUTE_UNUSED,
3883 const_tree fndecl ATTRIBUTE_UNUSED)
3885 /* Record the unallocated VFP registers. */
3886 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3887 pcum->aapcs_vfp_reg_alloc = 0;
3890 /* Walk down the type tree of TYPE counting consecutive base elements.
3891 If *MODEP is VOIDmode, then set it to the first valid floating point
3892 type. If a non-floating point type is found, or if a floating point
3893 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3894 otherwise return the count in the sub-tree. */
3896 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3898 enum machine_mode mode;
3901 switch (TREE_CODE (type))
3904 mode = TYPE_MODE (type);
3905 if (mode != DFmode && mode != SFmode)
3908 if (*modep == VOIDmode)
3917 mode = TYPE_MODE (TREE_TYPE (type));
3918 if (mode != DFmode && mode != SFmode)
3921 if (*modep == VOIDmode)
3930 /* Use V2SImode and V4SImode as representatives of all 64-bit
3931 and 128-bit vector types, whether or not those modes are
3932 supported with the present options. */
3933 size = int_size_in_bytes (type);
3946 if (*modep == VOIDmode)
3949 /* Vector modes are considered to be opaque: two vectors are
3950 equivalent for the purposes of being homogeneous aggregates
3951 if they are the same size. */
3960 tree index = TYPE_DOMAIN (type);
3962 /* Can't handle incomplete types. */
3963 if (!COMPLETE_TYPE_P(type))
3966 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3969 || !TYPE_MAX_VALUE (index)
3970 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3971 || !TYPE_MIN_VALUE (index)
3972 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3976 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3977 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3979 /* There must be no padding. */
3980 if (!host_integerp (TYPE_SIZE (type), 1)
3981 || (tree_low_cst (TYPE_SIZE (type), 1)
3982 != count * GET_MODE_BITSIZE (*modep)))
3994 /* Can't handle incomplete types. */
3995 if (!COMPLETE_TYPE_P(type))
3998 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4000 if (TREE_CODE (field) != FIELD_DECL)
4003 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4009 /* There must be no padding. */
4010 if (!host_integerp (TYPE_SIZE (type), 1)
4011 || (tree_low_cst (TYPE_SIZE (type), 1)
4012 != count * GET_MODE_BITSIZE (*modep)))
4019 case QUAL_UNION_TYPE:
4021 /* These aren't very interesting except in a degenerate case. */
4026 /* Can't handle incomplete types. */
4027 if (!COMPLETE_TYPE_P(type))
4030 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4032 if (TREE_CODE (field) != FIELD_DECL)
4035 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4038 count = count > sub_count ? count : sub_count;
4041 /* There must be no padding. */
4042 if (!host_integerp (TYPE_SIZE (type), 1)
4043 || (tree_low_cst (TYPE_SIZE (type), 1)
4044 != count * GET_MODE_BITSIZE (*modep)))
4057 /* Return true if PCS_VARIANT should use VFP registers. */
4059 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4061 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4063 static bool seen_thumb1_vfp = false;
4065 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4067 sorry ("Thumb-1 hard-float VFP ABI");
4068 /* sorry() is not immediately fatal, so only display this once. */
4069 seen_thumb1_vfp = true;
4075 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4078 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4079 (TARGET_VFP_DOUBLE || !is_double));
4083 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4084 enum machine_mode mode, const_tree type,
4085 enum machine_mode *base_mode, int *count)
4087 enum machine_mode new_mode = VOIDmode;
4089 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4090 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4091 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4096 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4099 new_mode = (mode == DCmode ? DFmode : SFmode);
4101 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4103 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4105 if (ag_count > 0 && ag_count <= 4)
4114 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4117 *base_mode = new_mode;
4122 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4123 enum machine_mode mode, const_tree type)
4125 int count ATTRIBUTE_UNUSED;
4126 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4128 if (!use_vfp_abi (pcs_variant, false))
4130 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4135 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4138 if (!use_vfp_abi (pcum->pcs_variant, false))
4141 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4142 &pcum->aapcs_vfp_rmode,
4143 &pcum->aapcs_vfp_rcount);
4147 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4148 const_tree type ATTRIBUTE_UNUSED)
4150 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4151 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4154 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4155 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4157 pcum->aapcs_vfp_reg_alloc = mask << regno;
4158 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4161 int rcount = pcum->aapcs_vfp_rcount;
4163 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4167 /* Avoid using unsupported vector modes. */
4168 if (rmode == V2SImode)
4170 else if (rmode == V4SImode)
4177 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4178 for (i = 0; i < rcount; i++)
4180 rtx tmp = gen_rtx_REG (rmode,
4181 FIRST_VFP_REGNUM + regno + i * rshift);
4182 tmp = gen_rtx_EXPR_LIST
4184 GEN_INT (i * GET_MODE_SIZE (rmode)));
4185 XVECEXP (par, 0, i) = tmp;
4188 pcum->aapcs_reg = par;
4191 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4198 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4199 enum machine_mode mode,
4200 const_tree type ATTRIBUTE_UNUSED)
4202 if (!use_vfp_abi (pcs_variant, false))
4205 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4208 enum machine_mode ag_mode;
4213 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4218 if (ag_mode == V2SImode)
4220 else if (ag_mode == V4SImode)
4226 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4227 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4228 for (i = 0; i < count; i++)
4230 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4231 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4232 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4233 XVECEXP (par, 0, i) = tmp;
4239 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4243 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4244 enum machine_mode mode ATTRIBUTE_UNUSED,
4245 const_tree type ATTRIBUTE_UNUSED)
4247 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4248 pcum->aapcs_vfp_reg_alloc = 0;
4252 #define AAPCS_CP(X) \
4254 aapcs_ ## X ## _cum_init, \
4255 aapcs_ ## X ## _is_call_candidate, \
4256 aapcs_ ## X ## _allocate, \
4257 aapcs_ ## X ## _is_return_candidate, \
4258 aapcs_ ## X ## _allocate_return_reg, \
4259 aapcs_ ## X ## _advance \
4262 /* Table of co-processors that can be used to pass arguments in
4263 registers. Idealy no arugment should be a candidate for more than
4264 one co-processor table entry, but the table is processed in order
4265 and stops after the first match. If that entry then fails to put
4266 the argument into a co-processor register, the argument will go on
4270 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4271 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4273 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4274 BLKmode) is a candidate for this co-processor's registers; this
4275 function should ignore any position-dependent state in
4276 CUMULATIVE_ARGS and only use call-type dependent information. */
4277 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4279 /* Return true if the argument does get a co-processor register; it
4280 should set aapcs_reg to an RTX of the register allocated as is
4281 required for a return from FUNCTION_ARG. */
4282 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4284 /* Return true if a result of mode MODE (or type TYPE if MODE is
4285 BLKmode) is can be returned in this co-processor's registers. */
4286 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4288 /* Allocate and return an RTX element to hold the return type of a
4289 call, this routine must not fail and will only be called if
4290 is_return_candidate returned true with the same parameters. */
4291 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4293 /* Finish processing this argument and prepare to start processing
4295 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4296 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4304 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4309 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4310 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4317 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4319 /* We aren't passed a decl, so we can't check that a call is local.
4320 However, it isn't clear that that would be a win anyway, since it
4321 might limit some tail-calling opportunities. */
4322 enum arm_pcs pcs_variant;
4326 const_tree fndecl = NULL_TREE;
4328 if (TREE_CODE (fntype) == FUNCTION_DECL)
4331 fntype = TREE_TYPE (fntype);
4334 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4337 pcs_variant = arm_pcs_default;
4339 if (pcs_variant != ARM_PCS_AAPCS)
4343 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4344 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4353 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4356 /* We aren't passed a decl, so we can't check that a call is local.
4357 However, it isn't clear that that would be a win anyway, since it
4358 might limit some tail-calling opportunities. */
4359 enum arm_pcs pcs_variant;
4360 int unsignedp ATTRIBUTE_UNUSED;
4364 const_tree fndecl = NULL_TREE;
4366 if (TREE_CODE (fntype) == FUNCTION_DECL)
4369 fntype = TREE_TYPE (fntype);
4372 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4375 pcs_variant = arm_pcs_default;
4377 /* Promote integer types. */
4378 if (type && INTEGRAL_TYPE_P (type))
4379 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4381 if (pcs_variant != ARM_PCS_AAPCS)
4385 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4386 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4388 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4392 /* Promotes small structs returned in a register to full-word size
4393 for big-endian AAPCS. */
4394 if (type && arm_return_in_msb (type))
4396 HOST_WIDE_INT size = int_size_in_bytes (type);
4397 if (size % UNITS_PER_WORD != 0)
4399 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4400 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4404 return gen_rtx_REG (mode, R0_REGNUM);
4408 aapcs_libcall_value (enum machine_mode mode)
4410 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4413 /* Lay out a function argument using the AAPCS rules. The rule
4414 numbers referred to here are those in the AAPCS. */
4416 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4417 const_tree type, bool named)
4422 /* We only need to do this once per argument. */
4423 if (pcum->aapcs_arg_processed)
4426 pcum->aapcs_arg_processed = true;
4428 /* Special case: if named is false then we are handling an incoming
4429 anonymous argument which is on the stack. */
4433 /* Is this a potential co-processor register candidate? */
4434 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4436 int slot = aapcs_select_call_coproc (pcum, mode, type);
4437 pcum->aapcs_cprc_slot = slot;
4439 /* We don't have to apply any of the rules from part B of the
4440 preparation phase, these are handled elsewhere in the
4445 /* A Co-processor register candidate goes either in its own
4446 class of registers or on the stack. */
4447 if (!pcum->aapcs_cprc_failed[slot])
4449 /* C1.cp - Try to allocate the argument to co-processor
4451 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4454 /* C2.cp - Put the argument on the stack and note that we
4455 can't assign any more candidates in this slot. We also
4456 need to note that we have allocated stack space, so that
4457 we won't later try to split a non-cprc candidate between
4458 core registers and the stack. */
4459 pcum->aapcs_cprc_failed[slot] = true;
4460 pcum->can_split = false;
4463 /* We didn't get a register, so this argument goes on the
4465 gcc_assert (pcum->can_split == false);
4470 /* C3 - For double-word aligned arguments, round the NCRN up to the
4471 next even number. */
4472 ncrn = pcum->aapcs_ncrn;
4473 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4476 nregs = ARM_NUM_REGS2(mode, type);
4478 /* Sigh, this test should really assert that nregs > 0, but a GCC
4479 extension allows empty structs and then gives them empty size; it
4480 then allows such a structure to be passed by value. For some of
4481 the code below we have to pretend that such an argument has
4482 non-zero size so that we 'locate' it correctly either in
4483 registers or on the stack. */
4484 gcc_assert (nregs >= 0);
4486 nregs2 = nregs ? nregs : 1;
4488 /* C4 - Argument fits entirely in core registers. */
4489 if (ncrn + nregs2 <= NUM_ARG_REGS)
4491 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4492 pcum->aapcs_next_ncrn = ncrn + nregs;
4496 /* C5 - Some core registers left and there are no arguments already
4497 on the stack: split this argument between the remaining core
4498 registers and the stack. */
4499 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4501 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4502 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4503 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4507 /* C6 - NCRN is set to 4. */
4508 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4510 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4514 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4515 for a call to a function whose data type is FNTYPE.
4516 For a library call, FNTYPE is NULL. */
4518 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4520 tree fndecl ATTRIBUTE_UNUSED)
4522 /* Long call handling. */
4524 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4526 pcum->pcs_variant = arm_pcs_default;
4528 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4530 if (arm_libcall_uses_aapcs_base (libname))
4531 pcum->pcs_variant = ARM_PCS_AAPCS;
4533 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4534 pcum->aapcs_reg = NULL_RTX;
4535 pcum->aapcs_partial = 0;
4536 pcum->aapcs_arg_processed = false;
4537 pcum->aapcs_cprc_slot = -1;
4538 pcum->can_split = true;
4540 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4544 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4546 pcum->aapcs_cprc_failed[i] = false;
4547 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4555 /* On the ARM, the offset starts at 0. */
4557 pcum->iwmmxt_nregs = 0;
4558 pcum->can_split = true;
4560 /* Varargs vectors are treated the same as long long.
4561 named_count avoids having to change the way arm handles 'named' */
4562 pcum->named_count = 0;
4565 if (TARGET_REALLY_IWMMXT && fntype)
4569 for (fn_arg = TYPE_ARG_TYPES (fntype);
4571 fn_arg = TREE_CHAIN (fn_arg))
4572 pcum->named_count += 1;
4574 if (! pcum->named_count)
4575 pcum->named_count = INT_MAX;
4580 /* Return true if mode/type need doubleword alignment. */
4582 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4584 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4585 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4589 /* Determine where to put an argument to a function.
4590 Value is zero to push the argument on the stack,
4591 or a hard register in which to store the argument.
4593 MODE is the argument's machine mode.
4594 TYPE is the data type of the argument (as a tree).
4595 This is null for libcalls where that information may
4597 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4598 the preceding args and about the function being called.
4599 NAMED is nonzero if this argument is a named parameter
4600 (otherwise it is an extra parameter matching an ellipsis).
4602 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4603 other arguments are passed on the stack. If (NAMED == 0) (which happens
4604 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4605 defined), say it is passed in the stack (function_prologue will
4606 indeed make it pass in the stack if necessary). */
4609 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4610 const_tree type, bool named)
4614 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4615 a call insn (op3 of a call_value insn). */
4616 if (mode == VOIDmode)
4619 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4621 aapcs_layout_arg (pcum, mode, type, named);
4622 return pcum->aapcs_reg;
4625 /* Varargs vectors are treated the same as long long.
4626 named_count avoids having to change the way arm handles 'named' */
4627 if (TARGET_IWMMXT_ABI
4628 && arm_vector_mode_supported_p (mode)
4629 && pcum->named_count > pcum->nargs + 1)
4631 if (pcum->iwmmxt_nregs <= 9)
4632 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4635 pcum->can_split = false;
4640 /* Put doubleword aligned quantities in even register pairs. */
4642 && ARM_DOUBLEWORD_ALIGN
4643 && arm_needs_doubleword_align (mode, type))
4646 /* Only allow splitting an arg between regs and memory if all preceding
4647 args were allocated to regs. For args passed by reference we only count
4648 the reference pointer. */
4649 if (pcum->can_split)
4652 nregs = ARM_NUM_REGS2 (mode, type);
4654 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4657 return gen_rtx_REG (mode, pcum->nregs);
4661 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4663 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4664 ? DOUBLEWORD_ALIGNMENT
4669 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4670 tree type, bool named)
4672 int nregs = pcum->nregs;
4674 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4676 aapcs_layout_arg (pcum, mode, type, named);
4677 return pcum->aapcs_partial;
4680 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4683 if (NUM_ARG_REGS > nregs
4684 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4686 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4691 /* Update the data in PCUM to advance over an argument
4692 of mode MODE and data type TYPE.
4693 (TYPE is null for libcalls where that information may not be available.) */
4696 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4697 const_tree type, bool named)
4699 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4701 aapcs_layout_arg (pcum, mode, type, named);
4703 if (pcum->aapcs_cprc_slot >= 0)
4705 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4707 pcum->aapcs_cprc_slot = -1;
4710 /* Generic stuff. */
4711 pcum->aapcs_arg_processed = false;
4712 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4713 pcum->aapcs_reg = NULL_RTX;
4714 pcum->aapcs_partial = 0;
4719 if (arm_vector_mode_supported_p (mode)
4720 && pcum->named_count > pcum->nargs
4721 && TARGET_IWMMXT_ABI)
4722 pcum->iwmmxt_nregs += 1;
4724 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4728 /* Variable sized types are passed by reference. This is a GCC
4729 extension to the ARM ABI. */
4732 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4733 enum machine_mode mode ATTRIBUTE_UNUSED,
4734 const_tree type, bool named ATTRIBUTE_UNUSED)
4736 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4739 /* Encode the current state of the #pragma [no_]long_calls. */
4742 OFF, /* No #pragma [no_]long_calls is in effect. */
4743 LONG, /* #pragma long_calls is in effect. */
4744 SHORT /* #pragma no_long_calls is in effect. */
4747 static arm_pragma_enum arm_pragma_long_calls = OFF;
4750 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4752 arm_pragma_long_calls = LONG;
4756 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4758 arm_pragma_long_calls = SHORT;
4762 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4764 arm_pragma_long_calls = OFF;
4767 /* Handle an attribute requiring a FUNCTION_DECL;
4768 arguments as in struct attribute_spec.handler. */
4770 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4771 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4773 if (TREE_CODE (*node) != FUNCTION_DECL)
4775 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4777 *no_add_attrs = true;
4783 /* Handle an "interrupt" or "isr" attribute;
4784 arguments as in struct attribute_spec.handler. */
4786 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4791 if (TREE_CODE (*node) != FUNCTION_DECL)
4793 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4795 *no_add_attrs = true;
4797 /* FIXME: the argument if any is checked for type attributes;
4798 should it be checked for decl ones? */
4802 if (TREE_CODE (*node) == FUNCTION_TYPE
4803 || TREE_CODE (*node) == METHOD_TYPE)
4805 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4807 warning (OPT_Wattributes, "%qE attribute ignored",
4809 *no_add_attrs = true;
4812 else if (TREE_CODE (*node) == POINTER_TYPE
4813 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4814 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4815 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4817 *node = build_variant_type_copy (*node);
4818 TREE_TYPE (*node) = build_type_attribute_variant
4820 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4821 *no_add_attrs = true;
4825 /* Possibly pass this attribute on from the type to a decl. */
4826 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4827 | (int) ATTR_FLAG_FUNCTION_NEXT
4828 | (int) ATTR_FLAG_ARRAY_NEXT))
4830 *no_add_attrs = true;
4831 return tree_cons (name, args, NULL_TREE);
4835 warning (OPT_Wattributes, "%qE attribute ignored",
4844 /* Handle a "pcs" attribute; arguments as in struct
4845 attribute_spec.handler. */
4847 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4848 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4850 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4852 warning (OPT_Wattributes, "%qE attribute ignored", name);
4853 *no_add_attrs = true;
4858 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4859 /* Handle the "notshared" attribute. This attribute is another way of
4860 requesting hidden visibility. ARM's compiler supports
4861 "__declspec(notshared)"; we support the same thing via an
4865 arm_handle_notshared_attribute (tree *node,
4866 tree name ATTRIBUTE_UNUSED,
4867 tree args ATTRIBUTE_UNUSED,
4868 int flags ATTRIBUTE_UNUSED,
4871 tree decl = TYPE_NAME (*node);
4875 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4876 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4877 *no_add_attrs = false;
4883 /* Return 0 if the attributes for two types are incompatible, 1 if they
4884 are compatible, and 2 if they are nearly compatible (which causes a
4885 warning to be generated). */
4887 arm_comp_type_attributes (const_tree type1, const_tree type2)
4891 /* Check for mismatch of non-default calling convention. */
4892 if (TREE_CODE (type1) != FUNCTION_TYPE)
4895 /* Check for mismatched call attributes. */
4896 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4897 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4898 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4899 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4901 /* Only bother to check if an attribute is defined. */
4902 if (l1 | l2 | s1 | s2)
4904 /* If one type has an attribute, the other must have the same attribute. */
4905 if ((l1 != l2) || (s1 != s2))
4908 /* Disallow mixed attributes. */
4909 if ((l1 & s2) || (l2 & s1))
4913 /* Check for mismatched ISR attribute. */
4914 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4916 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4917 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4919 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4926 /* Assigns default attributes to newly defined type. This is used to
4927 set short_call/long_call attributes for function types of
4928 functions defined inside corresponding #pragma scopes. */
4930 arm_set_default_type_attributes (tree type)
4932 /* Add __attribute__ ((long_call)) to all functions, when
4933 inside #pragma long_calls or __attribute__ ((short_call)),
4934 when inside #pragma no_long_calls. */
4935 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4937 tree type_attr_list, attr_name;
4938 type_attr_list = TYPE_ATTRIBUTES (type);
4940 if (arm_pragma_long_calls == LONG)
4941 attr_name = get_identifier ("long_call");
4942 else if (arm_pragma_long_calls == SHORT)
4943 attr_name = get_identifier ("short_call");
4947 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4948 TYPE_ATTRIBUTES (type) = type_attr_list;
4952 /* Return true if DECL is known to be linked into section SECTION. */
4955 arm_function_in_section_p (tree decl, section *section)
4957 /* We can only be certain about functions defined in the same
4958 compilation unit. */
4959 if (!TREE_STATIC (decl))
4962 /* Make sure that SYMBOL always binds to the definition in this
4963 compilation unit. */
4964 if (!targetm.binds_local_p (decl))
4967 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4968 if (!DECL_SECTION_NAME (decl))
4970 /* Make sure that we will not create a unique section for DECL. */
4971 if (flag_function_sections || DECL_ONE_ONLY (decl))
4975 return function_section (decl) == section;
4978 /* Return nonzero if a 32-bit "long_call" should be generated for
4979 a call from the current function to DECL. We generate a long_call
4982 a. has an __attribute__((long call))
4983 or b. is within the scope of a #pragma long_calls
4984 or c. the -mlong-calls command line switch has been specified
4986 However we do not generate a long call if the function:
4988 d. has an __attribute__ ((short_call))
4989 or e. is inside the scope of a #pragma no_long_calls
4990 or f. is defined in the same section as the current function. */
4993 arm_is_long_call_p (tree decl)
4998 return TARGET_LONG_CALLS;
5000 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5001 if (lookup_attribute ("short_call", attrs))
5004 /* For "f", be conservative, and only cater for cases in which the
5005 whole of the current function is placed in the same section. */
5006 if (!flag_reorder_blocks_and_partition
5007 && TREE_CODE (decl) == FUNCTION_DECL
5008 && arm_function_in_section_p (decl, current_function_section ()))
5011 if (lookup_attribute ("long_call", attrs))
5014 return TARGET_LONG_CALLS;
5017 /* Return nonzero if it is ok to make a tail-call to DECL. */
5019 arm_function_ok_for_sibcall (tree decl, tree exp)
5021 unsigned long func_type;
5023 if (cfun->machine->sibcall_blocked)
5026 /* Never tailcall something for which we have no decl, or if we
5027 are generating code for Thumb-1. */
5028 if (decl == NULL || TARGET_THUMB1)
5031 /* The PIC register is live on entry to VxWorks PLT entries, so we
5032 must make the call before restoring the PIC register. */
5033 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5036 /* Cannot tail-call to long calls, since these are out of range of
5037 a branch instruction. */
5038 if (arm_is_long_call_p (decl))
5041 /* If we are interworking and the function is not declared static
5042 then we can't tail-call it unless we know that it exists in this
5043 compilation unit (since it might be a Thumb routine). */
5044 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5047 func_type = arm_current_func_type ();
5048 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5049 if (IS_INTERRUPT (func_type))
5052 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5054 /* Check that the return value locations are the same. For
5055 example that we aren't returning a value from the sibling in
5056 a VFP register but then need to transfer it to a core
5060 a = arm_function_value (TREE_TYPE (exp), decl, false);
5061 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5063 if (!rtx_equal_p (a, b))
5067 /* Never tailcall if function may be called with a misaligned SP. */
5068 if (IS_STACKALIGN (func_type))
5071 /* Everything else is ok. */
5076 /* Addressing mode support functions. */
5078 /* Return nonzero if X is a legitimate immediate operand when compiling
5079 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5081 legitimate_pic_operand_p (rtx x)
5083 if (GET_CODE (x) == SYMBOL_REF
5084 || (GET_CODE (x) == CONST
5085 && GET_CODE (XEXP (x, 0)) == PLUS
5086 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5092 /* Record that the current function needs a PIC register. Initialize
5093 cfun->machine->pic_reg if we have not already done so. */
5096 require_pic_register (void)
5098 /* A lot of the logic here is made obscure by the fact that this
5099 routine gets called as part of the rtx cost estimation process.
5100 We don't want those calls to affect any assumptions about the real
5101 function; and further, we can't call entry_of_function() until we
5102 start the real expansion process. */
5103 if (!crtl->uses_pic_offset_table)
5105 gcc_assert (can_create_pseudo_p ());
5106 if (arm_pic_register != INVALID_REGNUM)
5108 if (!cfun->machine->pic_reg)
5109 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5111 /* Play games to avoid marking the function as needing pic
5112 if we are being called as part of the cost-estimation
5114 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5115 crtl->uses_pic_offset_table = 1;
5121 if (!cfun->machine->pic_reg)
5122 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5124 /* Play games to avoid marking the function as needing pic
5125 if we are being called as part of the cost-estimation
5127 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5129 crtl->uses_pic_offset_table = 1;
5132 arm_load_pic_register (0UL);
5136 /* We can be called during expansion of PHI nodes, where
5137 we can't yet emit instructions directly in the final
5138 insn stream. Queue the insns on the entry edge, they will
5139 be committed after everything else is expanded. */
5140 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5147 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5149 if (GET_CODE (orig) == SYMBOL_REF
5150 || GET_CODE (orig) == LABEL_REF)
5156 gcc_assert (can_create_pseudo_p ());
5157 reg = gen_reg_rtx (Pmode);
5160 /* VxWorks does not impose a fixed gap between segments; the run-time
5161 gap can be different from the object-file gap. We therefore can't
5162 use GOTOFF unless we are absolutely sure that the symbol is in the
5163 same segment as the GOT. Unfortunately, the flexibility of linker
5164 scripts means that we can't be sure of that in general, so assume
5165 that GOTOFF is never valid on VxWorks. */
5166 if ((GET_CODE (orig) == LABEL_REF
5167 || (GET_CODE (orig) == SYMBOL_REF &&
5168 SYMBOL_REF_LOCAL_P (orig)))
5170 && !TARGET_VXWORKS_RTP)
5171 insn = arm_pic_static_addr (orig, reg);
5177 /* If this function doesn't have a pic register, create one now. */
5178 require_pic_register ();
5180 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5182 /* Make the MEM as close to a constant as possible. */
5183 mem = SET_SRC (pat);
5184 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5185 MEM_READONLY_P (mem) = 1;
5186 MEM_NOTRAP_P (mem) = 1;
5188 insn = emit_insn (pat);
5191 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5193 set_unique_reg_note (insn, REG_EQUAL, orig);
5197 else if (GET_CODE (orig) == CONST)
5201 if (GET_CODE (XEXP (orig, 0)) == PLUS
5202 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5205 /* Handle the case where we have: const (UNSPEC_TLS). */
5206 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5207 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5210 /* Handle the case where we have:
5211 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5213 if (GET_CODE (XEXP (orig, 0)) == PLUS
5214 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5215 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5217 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5223 gcc_assert (can_create_pseudo_p ());
5224 reg = gen_reg_rtx (Pmode);
5227 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5229 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5230 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5231 base == reg ? 0 : reg);
5233 if (GET_CODE (offset) == CONST_INT)
5235 /* The base register doesn't really matter, we only want to
5236 test the index for the appropriate mode. */
5237 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5239 gcc_assert (can_create_pseudo_p ());
5240 offset = force_reg (Pmode, offset);
5243 if (GET_CODE (offset) == CONST_INT)
5244 return plus_constant (base, INTVAL (offset));
5247 if (GET_MODE_SIZE (mode) > 4
5248 && (GET_MODE_CLASS (mode) == MODE_INT
5249 || TARGET_SOFT_FLOAT))
5251 emit_insn (gen_addsi3 (reg, base, offset));
5255 return gen_rtx_PLUS (Pmode, base, offset);
5262 /* Find a spare register to use during the prolog of a function. */
5265 thumb_find_work_register (unsigned long pushed_regs_mask)
5269 /* Check the argument registers first as these are call-used. The
5270 register allocation order means that sometimes r3 might be used
5271 but earlier argument registers might not, so check them all. */
5272 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5273 if (!df_regs_ever_live_p (reg))
5276 /* Before going on to check the call-saved registers we can try a couple
5277 more ways of deducing that r3 is available. The first is when we are
5278 pushing anonymous arguments onto the stack and we have less than 4
5279 registers worth of fixed arguments(*). In this case r3 will be part of
5280 the variable argument list and so we can be sure that it will be
5281 pushed right at the start of the function. Hence it will be available
5282 for the rest of the prologue.
5283 (*): ie crtl->args.pretend_args_size is greater than 0. */
5284 if (cfun->machine->uses_anonymous_args
5285 && crtl->args.pretend_args_size > 0)
5286 return LAST_ARG_REGNUM;
5288 /* The other case is when we have fixed arguments but less than 4 registers
5289 worth. In this case r3 might be used in the body of the function, but
5290 it is not being used to convey an argument into the function. In theory
5291 we could just check crtl->args.size to see how many bytes are
5292 being passed in argument registers, but it seems that it is unreliable.
5293 Sometimes it will have the value 0 when in fact arguments are being
5294 passed. (See testcase execute/20021111-1.c for an example). So we also
5295 check the args_info.nregs field as well. The problem with this field is
5296 that it makes no allowances for arguments that are passed to the
5297 function but which are not used. Hence we could miss an opportunity
5298 when a function has an unused argument in r3. But it is better to be
5299 safe than to be sorry. */
5300 if (! cfun->machine->uses_anonymous_args
5301 && crtl->args.size >= 0
5302 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5303 && crtl->args.info.nregs < 4)
5304 return LAST_ARG_REGNUM;
5306 /* Otherwise look for a call-saved register that is going to be pushed. */
5307 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5308 if (pushed_regs_mask & (1 << reg))
5313 /* Thumb-2 can use high regs. */
5314 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5315 if (pushed_regs_mask & (1 << reg))
5318 /* Something went wrong - thumb_compute_save_reg_mask()
5319 should have arranged for a suitable register to be pushed. */
5323 static GTY(()) int pic_labelno;
5325 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5329 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5331 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5333 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5336 gcc_assert (flag_pic);
5338 pic_reg = cfun->machine->pic_reg;
5339 if (TARGET_VXWORKS_RTP)
5341 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5342 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5343 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5345 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5347 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5348 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5352 /* We use an UNSPEC rather than a LABEL_REF because this label
5353 never appears in the code stream. */
5355 labelno = GEN_INT (pic_labelno++);
5356 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5357 l1 = gen_rtx_CONST (VOIDmode, l1);
5359 /* On the ARM the PC register contains 'dot + 8' at the time of the
5360 addition, on the Thumb it is 'dot + 4'. */
5361 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5362 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5364 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5368 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5370 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5372 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5374 else /* TARGET_THUMB1 */
5376 if (arm_pic_register != INVALID_REGNUM
5377 && REGNO (pic_reg) > LAST_LO_REGNUM)
5379 /* We will have pushed the pic register, so we should always be
5380 able to find a work register. */
5381 pic_tmp = gen_rtx_REG (SImode,
5382 thumb_find_work_register (saved_regs));
5383 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5384 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5387 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5388 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5392 /* Need to emit this whether or not we obey regdecls,
5393 since setjmp/longjmp can cause life info to screw up. */
5397 /* Generate code to load the address of a static var when flag_pic is set. */
5399 arm_pic_static_addr (rtx orig, rtx reg)
5401 rtx l1, labelno, offset_rtx, insn;
5403 gcc_assert (flag_pic);
5405 /* We use an UNSPEC rather than a LABEL_REF because this label
5406 never appears in the code stream. */
5407 labelno = GEN_INT (pic_labelno++);
5408 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5409 l1 = gen_rtx_CONST (VOIDmode, l1);
5411 /* On the ARM the PC register contains 'dot + 8' at the time of the
5412 addition, on the Thumb it is 'dot + 4'. */
5413 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5414 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5415 UNSPEC_SYMBOL_OFFSET);
5416 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5420 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5422 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5424 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5426 else /* TARGET_THUMB1 */
5428 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5429 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5435 /* Return nonzero if X is valid as an ARM state addressing register. */
5437 arm_address_register_rtx_p (rtx x, int strict_p)
5441 if (GET_CODE (x) != REG)
5447 return ARM_REGNO_OK_FOR_BASE_P (regno);
5449 return (regno <= LAST_ARM_REGNUM
5450 || regno >= FIRST_PSEUDO_REGISTER
5451 || regno == FRAME_POINTER_REGNUM
5452 || regno == ARG_POINTER_REGNUM);
5455 /* Return TRUE if this rtx is the difference of a symbol and a label,
5456 and will reduce to a PC-relative relocation in the object file.
5457 Expressions like this can be left alone when generating PIC, rather
5458 than forced through the GOT. */
5460 pcrel_constant_p (rtx x)
5462 if (GET_CODE (x) == MINUS)
5463 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5468 /* Return true if X will surely end up in an index register after next
5471 will_be_in_index_register (const_rtx x)
5473 /* arm.md: calculate_pic_address will split this into a register. */
5474 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5477 /* Return nonzero if X is a valid ARM state address operand. */
5479 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5483 enum rtx_code code = GET_CODE (x);
5485 if (arm_address_register_rtx_p (x, strict_p))
5488 use_ldrd = (TARGET_LDRD
5490 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5492 if (code == POST_INC || code == PRE_DEC
5493 || ((code == PRE_INC || code == POST_DEC)
5494 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5495 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5497 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5498 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5499 && GET_CODE (XEXP (x, 1)) == PLUS
5500 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5502 rtx addend = XEXP (XEXP (x, 1), 1);
5504 /* Don't allow ldrd post increment by register because it's hard
5505 to fixup invalid register choices. */
5507 && GET_CODE (x) == POST_MODIFY
5508 && GET_CODE (addend) == REG)
5511 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5512 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5515 /* After reload constants split into minipools will have addresses
5516 from a LABEL_REF. */
5517 else if (reload_completed
5518 && (code == LABEL_REF
5520 && GET_CODE (XEXP (x, 0)) == PLUS
5521 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5522 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5525 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5528 else if (code == PLUS)
5530 rtx xop0 = XEXP (x, 0);
5531 rtx xop1 = XEXP (x, 1);
5533 return ((arm_address_register_rtx_p (xop0, strict_p)
5534 && ((GET_CODE(xop1) == CONST_INT
5535 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5536 || (!strict_p && will_be_in_index_register (xop1))))
5537 || (arm_address_register_rtx_p (xop1, strict_p)
5538 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5542 /* Reload currently can't handle MINUS, so disable this for now */
5543 else if (GET_CODE (x) == MINUS)
5545 rtx xop0 = XEXP (x, 0);
5546 rtx xop1 = XEXP (x, 1);
5548 return (arm_address_register_rtx_p (xop0, strict_p)
5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5553 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5554 && code == SYMBOL_REF
5555 && CONSTANT_POOL_ADDRESS_P (x)
5557 && symbol_mentioned_p (get_pool_constant (x))
5558 && ! pcrel_constant_p (get_pool_constant (x))))
5564 /* Return nonzero if X is a valid Thumb-2 address operand. */
5566 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5569 enum rtx_code code = GET_CODE (x);
5571 if (arm_address_register_rtx_p (x, strict_p))
5574 use_ldrd = (TARGET_LDRD
5576 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5578 if (code == POST_INC || code == PRE_DEC
5579 || ((code == PRE_INC || code == POST_DEC)
5580 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5581 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5583 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5584 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5585 && GET_CODE (XEXP (x, 1)) == PLUS
5586 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5588 /* Thumb-2 only has autoincrement by constant. */
5589 rtx addend = XEXP (XEXP (x, 1), 1);
5590 HOST_WIDE_INT offset;
5592 if (GET_CODE (addend) != CONST_INT)
5595 offset = INTVAL(addend);
5596 if (GET_MODE_SIZE (mode) <= 4)
5597 return (offset > -256 && offset < 256);
5599 return (use_ldrd && offset > -1024 && offset < 1024
5600 && (offset & 3) == 0);
5603 /* After reload constants split into minipools will have addresses
5604 from a LABEL_REF. */
5605 else if (reload_completed
5606 && (code == LABEL_REF
5608 && GET_CODE (XEXP (x, 0)) == PLUS
5609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5610 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5613 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5616 else if (code == PLUS)
5618 rtx xop0 = XEXP (x, 0);
5619 rtx xop1 = XEXP (x, 1);
5621 return ((arm_address_register_rtx_p (xop0, strict_p)
5622 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5623 || (!strict_p && will_be_in_index_register (xop1))))
5624 || (arm_address_register_rtx_p (xop1, strict_p)
5625 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5628 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5629 && code == SYMBOL_REF
5630 && CONSTANT_POOL_ADDRESS_P (x)
5632 && symbol_mentioned_p (get_pool_constant (x))
5633 && ! pcrel_constant_p (get_pool_constant (x))))
5639 /* Return nonzero if INDEX is valid for an address index operand in
5642 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5645 HOST_WIDE_INT range;
5646 enum rtx_code code = GET_CODE (index);
5648 /* Standard coprocessor addressing modes. */
5649 if (TARGET_HARD_FLOAT
5650 && (TARGET_FPA || TARGET_MAVERICK)
5651 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5652 || (TARGET_MAVERICK && mode == DImode)))
5653 return (code == CONST_INT && INTVAL (index) < 1024
5654 && INTVAL (index) > -1024
5655 && (INTVAL (index) & 3) == 0);
5657 /* For quad modes, we restrict the constant offset to be slightly less
5658 than what the instruction format permits. We do this because for
5659 quad mode moves, we will actually decompose them into two separate
5660 double-mode reads or writes. INDEX must therefore be a valid
5661 (double-mode) offset and so should INDEX+8. */
5662 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5663 return (code == CONST_INT
5664 && INTVAL (index) < 1016
5665 && INTVAL (index) > -1024
5666 && (INTVAL (index) & 3) == 0);
5668 /* We have no such constraint on double mode offsets, so we permit the
5669 full range of the instruction format. */
5670 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5671 return (code == CONST_INT
5672 && INTVAL (index) < 1024
5673 && INTVAL (index) > -1024
5674 && (INTVAL (index) & 3) == 0);
5676 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1024
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 if (arm_address_register_rtx_p (index, strict_p)
5683 && (GET_MODE_SIZE (mode) <= 4))
5686 if (mode == DImode || mode == DFmode)
5688 if (code == CONST_INT)
5690 HOST_WIDE_INT val = INTVAL (index);
5693 return val > -256 && val < 256;
5695 return val > -4096 && val < 4092;
5698 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5701 if (GET_MODE_SIZE (mode) <= 4
5705 || (mode == QImode && outer == SIGN_EXTEND))))
5709 rtx xiop0 = XEXP (index, 0);
5710 rtx xiop1 = XEXP (index, 1);
5712 return ((arm_address_register_rtx_p (xiop0, strict_p)
5713 && power_of_two_operand (xiop1, SImode))
5714 || (arm_address_register_rtx_p (xiop1, strict_p)
5715 && power_of_two_operand (xiop0, SImode)));
5717 else if (code == LSHIFTRT || code == ASHIFTRT
5718 || code == ASHIFT || code == ROTATERT)
5720 rtx op = XEXP (index, 1);
5722 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5723 && GET_CODE (op) == CONST_INT
5725 && INTVAL (op) <= 31);
5729 /* For ARM v4 we may be doing a sign-extend operation during the
5735 || (outer == SIGN_EXTEND && mode == QImode))
5741 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5743 return (code == CONST_INT
5744 && INTVAL (index) < range
5745 && INTVAL (index) > -range);
5748 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5749 index operand. i.e. 1, 2, 4 or 8. */
5751 thumb2_index_mul_operand (rtx op)
5755 if (GET_CODE(op) != CONST_INT)
5759 return (val == 1 || val == 2 || val == 4 || val == 8);
5762 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5764 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5766 enum rtx_code code = GET_CODE (index);
5768 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5769 /* Standard coprocessor addressing modes. */
5770 if (TARGET_HARD_FLOAT
5771 && (TARGET_FPA || TARGET_MAVERICK)
5772 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5773 || (TARGET_MAVERICK && mode == DImode)))
5774 return (code == CONST_INT && INTVAL (index) < 1024
5775 && INTVAL (index) > -1024
5776 && (INTVAL (index) & 3) == 0);
5778 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5780 /* For DImode assume values will usually live in core regs
5781 and only allow LDRD addressing modes. */
5782 if (!TARGET_LDRD || mode != DImode)
5783 return (code == CONST_INT
5784 && INTVAL (index) < 1024
5785 && INTVAL (index) > -1024
5786 && (INTVAL (index) & 3) == 0);
5789 /* For quad modes, we restrict the constant offset to be slightly less
5790 than what the instruction format permits. We do this because for
5791 quad mode moves, we will actually decompose them into two separate
5792 double-mode reads or writes. INDEX must therefore be a valid
5793 (double-mode) offset and so should INDEX+8. */
5794 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5795 return (code == CONST_INT
5796 && INTVAL (index) < 1016
5797 && INTVAL (index) > -1024
5798 && (INTVAL (index) & 3) == 0);
5800 /* We have no such constraint on double mode offsets, so we permit the
5801 full range of the instruction format. */
5802 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5803 return (code == CONST_INT
5804 && INTVAL (index) < 1024
5805 && INTVAL (index) > -1024
5806 && (INTVAL (index) & 3) == 0);
5808 if (arm_address_register_rtx_p (index, strict_p)
5809 && (GET_MODE_SIZE (mode) <= 4))
5812 if (mode == DImode || mode == DFmode)
5814 if (code == CONST_INT)
5816 HOST_WIDE_INT val = INTVAL (index);
5817 /* ??? Can we assume ldrd for thumb2? */
5818 /* Thumb-2 ldrd only has reg+const addressing modes. */
5819 /* ldrd supports offsets of +-1020.
5820 However the ldr fallback does not. */
5821 return val > -256 && val < 256 && (val & 3) == 0;
5829 rtx xiop0 = XEXP (index, 0);
5830 rtx xiop1 = XEXP (index, 1);
5832 return ((arm_address_register_rtx_p (xiop0, strict_p)
5833 && thumb2_index_mul_operand (xiop1))
5834 || (arm_address_register_rtx_p (xiop1, strict_p)
5835 && thumb2_index_mul_operand (xiop0)));
5837 else if (code == ASHIFT)
5839 rtx op = XEXP (index, 1);
5841 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5842 && GET_CODE (op) == CONST_INT
5844 && INTVAL (op) <= 3);
5847 return (code == CONST_INT
5848 && INTVAL (index) < 4096
5849 && INTVAL (index) > -256);
5852 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5854 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5858 if (GET_CODE (x) != REG)
5864 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5866 return (regno <= LAST_LO_REGNUM
5867 || regno > LAST_VIRTUAL_REGISTER
5868 || regno == FRAME_POINTER_REGNUM
5869 || (GET_MODE_SIZE (mode) >= 4
5870 && (regno == STACK_POINTER_REGNUM
5871 || regno >= FIRST_PSEUDO_REGISTER
5872 || x == hard_frame_pointer_rtx
5873 || x == arg_pointer_rtx)));
5876 /* Return nonzero if x is a legitimate index register. This is the case
5877 for any base register that can access a QImode object. */
5879 thumb1_index_register_rtx_p (rtx x, int strict_p)
5881 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5884 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5886 The AP may be eliminated to either the SP or the FP, so we use the
5887 least common denominator, e.g. SImode, and offsets from 0 to 64.
5889 ??? Verify whether the above is the right approach.
5891 ??? Also, the FP may be eliminated to the SP, so perhaps that
5892 needs special handling also.
5894 ??? Look at how the mips16 port solves this problem. It probably uses
5895 better ways to solve some of these problems.
5897 Although it is not incorrect, we don't accept QImode and HImode
5898 addresses based on the frame pointer or arg pointer until the
5899 reload pass starts. This is so that eliminating such addresses
5900 into stack based ones won't produce impossible code. */
5902 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5904 /* ??? Not clear if this is right. Experiment. */
5905 if (GET_MODE_SIZE (mode) < 4
5906 && !(reload_in_progress || reload_completed)
5907 && (reg_mentioned_p (frame_pointer_rtx, x)
5908 || reg_mentioned_p (arg_pointer_rtx, x)
5909 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5910 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5911 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5912 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5915 /* Accept any base register. SP only in SImode or larger. */
5916 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5919 /* This is PC relative data before arm_reorg runs. */
5920 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5921 && GET_CODE (x) == SYMBOL_REF
5922 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5925 /* This is PC relative data after arm_reorg runs. */
5926 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5928 && (GET_CODE (x) == LABEL_REF
5929 || (GET_CODE (x) == CONST
5930 && GET_CODE (XEXP (x, 0)) == PLUS
5931 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5932 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5935 /* Post-inc indexing only supported for SImode and larger. */
5936 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5937 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5940 else if (GET_CODE (x) == PLUS)
5942 /* REG+REG address can be any two index registers. */
5943 /* We disallow FRAME+REG addressing since we know that FRAME
5944 will be replaced with STACK, and SP relative addressing only
5945 permits SP+OFFSET. */
5946 if (GET_MODE_SIZE (mode) <= 4
5947 && XEXP (x, 0) != frame_pointer_rtx
5948 && XEXP (x, 1) != frame_pointer_rtx
5949 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5950 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5951 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5954 /* REG+const has 5-7 bit offset for non-SP registers. */
5955 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5956 || XEXP (x, 0) == arg_pointer_rtx)
5957 && GET_CODE (XEXP (x, 1)) == CONST_INT
5958 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5961 /* REG+const has 10-bit offset for SP, but only SImode and
5962 larger is supported. */
5963 /* ??? Should probably check for DI/DFmode overflow here
5964 just like GO_IF_LEGITIMATE_OFFSET does. */
5965 else if (GET_CODE (XEXP (x, 0)) == REG
5966 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5967 && GET_MODE_SIZE (mode) >= 4
5968 && GET_CODE (XEXP (x, 1)) == CONST_INT
5969 && INTVAL (XEXP (x, 1)) >= 0
5970 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5971 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5974 else if (GET_CODE (XEXP (x, 0)) == REG
5975 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5976 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5977 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5978 && REGNO (XEXP (x, 0))
5979 <= LAST_VIRTUAL_POINTER_REGISTER))
5980 && GET_MODE_SIZE (mode) >= 4
5981 && GET_CODE (XEXP (x, 1)) == CONST_INT
5982 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5986 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5987 && GET_MODE_SIZE (mode) == 4
5988 && GET_CODE (x) == SYMBOL_REF
5989 && CONSTANT_POOL_ADDRESS_P (x)
5991 && symbol_mentioned_p (get_pool_constant (x))
5992 && ! pcrel_constant_p (get_pool_constant (x))))
5998 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5999 instruction of mode MODE. */
6001 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6003 switch (GET_MODE_SIZE (mode))
6006 return val >= 0 && val < 32;
6009 return val >= 0 && val < 64 && (val & 1) == 0;
6013 && (val + GET_MODE_SIZE (mode)) <= 128
6019 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6022 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6023 else if (TARGET_THUMB2)
6024 return thumb2_legitimate_address_p (mode, x, strict_p);
6025 else /* if (TARGET_THUMB1) */
6026 return thumb1_legitimate_address_p (mode, x, strict_p);
6029 /* Build the SYMBOL_REF for __tls_get_addr. */
6031 static GTY(()) rtx tls_get_addr_libfunc;
6034 get_tls_get_addr (void)
6036 if (!tls_get_addr_libfunc)
6037 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6038 return tls_get_addr_libfunc;
6042 arm_load_tp (rtx target)
6045 target = gen_reg_rtx (SImode);
6049 /* Can return in any reg. */
6050 emit_insn (gen_load_tp_hard (target));
6054 /* Always returned in r0. Immediately copy the result into a pseudo,
6055 otherwise other uses of r0 (e.g. setting up function arguments) may
6056 clobber the value. */
6060 emit_insn (gen_load_tp_soft ());
6062 tmp = gen_rtx_REG (SImode, 0);
6063 emit_move_insn (target, tmp);
6069 load_tls_operand (rtx x, rtx reg)
6073 if (reg == NULL_RTX)
6074 reg = gen_reg_rtx (SImode);
6076 tmp = gen_rtx_CONST (SImode, x);
6078 emit_move_insn (reg, tmp);
6084 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6086 rtx insns, label, labelno, sum;
6090 labelno = GEN_INT (pic_labelno++);
6091 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6092 label = gen_rtx_CONST (VOIDmode, label);
6094 sum = gen_rtx_UNSPEC (Pmode,
6095 gen_rtvec (4, x, GEN_INT (reloc), label,
6096 GEN_INT (TARGET_ARM ? 8 : 4)),
6098 reg = load_tls_operand (sum, reg);
6101 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6102 else if (TARGET_THUMB2)
6103 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6104 else /* TARGET_THUMB1 */
6105 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6107 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6108 Pmode, 1, reg, Pmode);
6110 insns = get_insns ();
6117 legitimize_tls_address (rtx x, rtx reg)
6119 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6120 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6124 case TLS_MODEL_GLOBAL_DYNAMIC:
6125 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6126 dest = gen_reg_rtx (Pmode);
6127 emit_libcall_block (insns, dest, ret, x);
6130 case TLS_MODEL_LOCAL_DYNAMIC:
6131 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6133 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6134 share the LDM result with other LD model accesses. */
6135 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6137 dest = gen_reg_rtx (Pmode);
6138 emit_libcall_block (insns, dest, ret, eqv);
6140 /* Load the addend. */
6141 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6143 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6144 return gen_rtx_PLUS (Pmode, dest, addend);
6146 case TLS_MODEL_INITIAL_EXEC:
6147 labelno = GEN_INT (pic_labelno++);
6148 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6149 label = gen_rtx_CONST (VOIDmode, label);
6150 sum = gen_rtx_UNSPEC (Pmode,
6151 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6152 GEN_INT (TARGET_ARM ? 8 : 4)),
6154 reg = load_tls_operand (sum, reg);
6157 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6158 else if (TARGET_THUMB2)
6159 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6162 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6163 emit_move_insn (reg, gen_const_mem (SImode, reg));
6166 tp = arm_load_tp (NULL_RTX);
6168 return gen_rtx_PLUS (Pmode, tp, reg);
6170 case TLS_MODEL_LOCAL_EXEC:
6171 tp = arm_load_tp (NULL_RTX);
6173 reg = gen_rtx_UNSPEC (Pmode,
6174 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6176 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6178 return gen_rtx_PLUS (Pmode, tp, reg);
6185 /* Try machine-dependent ways of modifying an illegitimate address
6186 to be legitimate. If we find one, return the new, valid address. */
6188 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6192 /* TODO: legitimize_address for Thumb2. */
6195 return thumb_legitimize_address (x, orig_x, mode);
6198 if (arm_tls_symbol_p (x))
6199 return legitimize_tls_address (x, NULL_RTX);
6201 if (GET_CODE (x) == PLUS)
6203 rtx xop0 = XEXP (x, 0);
6204 rtx xop1 = XEXP (x, 1);
6206 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6207 xop0 = force_reg (SImode, xop0);
6209 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6210 xop1 = force_reg (SImode, xop1);
6212 if (ARM_BASE_REGISTER_RTX_P (xop0)
6213 && GET_CODE (xop1) == CONST_INT)
6215 HOST_WIDE_INT n, low_n;
6219 /* VFP addressing modes actually allow greater offsets, but for
6220 now we just stick with the lowest common denominator. */
6222 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6234 low_n = ((mode) == TImode ? 0
6235 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6239 base_reg = gen_reg_rtx (SImode);
6240 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6241 emit_move_insn (base_reg, val);
6242 x = plus_constant (base_reg, low_n);
6244 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6245 x = gen_rtx_PLUS (SImode, xop0, xop1);
6248 /* XXX We don't allow MINUS any more -- see comment in
6249 arm_legitimate_address_outer_p (). */
6250 else if (GET_CODE (x) == MINUS)
6252 rtx xop0 = XEXP (x, 0);
6253 rtx xop1 = XEXP (x, 1);
6255 if (CONSTANT_P (xop0))
6256 xop0 = force_reg (SImode, xop0);
6258 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6259 xop1 = force_reg (SImode, xop1);
6261 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6262 x = gen_rtx_MINUS (SImode, xop0, xop1);
6265 /* Make sure to take full advantage of the pre-indexed addressing mode
6266 with absolute addresses which often allows for the base register to
6267 be factorized for multiple adjacent memory references, and it might
6268 even allows for the mini pool to be avoided entirely. */
6269 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6272 HOST_WIDE_INT mask, base, index;
6275 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6276 use a 8-bit index. So let's use a 12-bit index for SImode only and
6277 hope that arm_gen_constant will enable ldrb to use more bits. */
6278 bits = (mode == SImode) ? 12 : 8;
6279 mask = (1 << bits) - 1;
6280 base = INTVAL (x) & ~mask;
6281 index = INTVAL (x) & mask;
6282 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6284 /* It'll most probably be more efficient to generate the base
6285 with more bits set and use a negative index instead. */
6289 base_reg = force_reg (SImode, GEN_INT (base));
6290 x = plus_constant (base_reg, index);
6295 /* We need to find and carefully transform any SYMBOL and LABEL
6296 references; so go back to the original address expression. */
6297 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6299 if (new_x != orig_x)
6307 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6308 to be legitimate. If we find one, return the new, valid address. */
6310 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6312 if (arm_tls_symbol_p (x))
6313 return legitimize_tls_address (x, NULL_RTX);
6315 if (GET_CODE (x) == PLUS
6316 && GET_CODE (XEXP (x, 1)) == CONST_INT
6317 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6318 || INTVAL (XEXP (x, 1)) < 0))
6320 rtx xop0 = XEXP (x, 0);
6321 rtx xop1 = XEXP (x, 1);
6322 HOST_WIDE_INT offset = INTVAL (xop1);
6324 /* Try and fold the offset into a biasing of the base register and
6325 then offsetting that. Don't do this when optimizing for space
6326 since it can cause too many CSEs. */
6327 if (optimize_size && offset >= 0
6328 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6330 HOST_WIDE_INT delta;
6333 delta = offset - (256 - GET_MODE_SIZE (mode));
6334 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6335 delta = 31 * GET_MODE_SIZE (mode);
6337 delta = offset & (~31 * GET_MODE_SIZE (mode));
6339 xop0 = force_operand (plus_constant (xop0, offset - delta),
6341 x = plus_constant (xop0, delta);
6343 else if (offset < 0 && offset > -256)
6344 /* Small negative offsets are best done with a subtract before the
6345 dereference, forcing these into a register normally takes two
6347 x = force_operand (x, NULL_RTX);
6350 /* For the remaining cases, force the constant into a register. */
6351 xop1 = force_reg (SImode, xop1);
6352 x = gen_rtx_PLUS (SImode, xop0, xop1);
6355 else if (GET_CODE (x) == PLUS
6356 && s_register_operand (XEXP (x, 1), SImode)
6357 && !s_register_operand (XEXP (x, 0), SImode))
6359 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6361 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6366 /* We need to find and carefully transform any SYMBOL and LABEL
6367 references; so go back to the original address expression. */
6368 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6370 if (new_x != orig_x)
6378 thumb_legitimize_reload_address (rtx *x_p,
6379 enum machine_mode mode,
6380 int opnum, int type,
6381 int ind_levels ATTRIBUTE_UNUSED)
6385 if (GET_CODE (x) == PLUS
6386 && GET_MODE_SIZE (mode) < 4
6387 && REG_P (XEXP (x, 0))
6388 && XEXP (x, 0) == stack_pointer_rtx
6389 && GET_CODE (XEXP (x, 1)) == CONST_INT
6390 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6395 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6396 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6400 /* If both registers are hi-regs, then it's better to reload the
6401 entire expression rather than each register individually. That
6402 only requires one reload register rather than two. */
6403 if (GET_CODE (x) == PLUS
6404 && REG_P (XEXP (x, 0))
6405 && REG_P (XEXP (x, 1))
6406 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6407 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6412 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6413 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6420 /* Test for various thread-local symbols. */
6422 /* Return TRUE if X is a thread-local symbol. */
6425 arm_tls_symbol_p (rtx x)
6427 if (! TARGET_HAVE_TLS)
6430 if (GET_CODE (x) != SYMBOL_REF)
6433 return SYMBOL_REF_TLS_MODEL (x) != 0;
6436 /* Helper for arm_tls_referenced_p. */
6439 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6441 if (GET_CODE (*x) == SYMBOL_REF)
6442 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6444 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6445 TLS offsets, not real symbol references. */
6446 if (GET_CODE (*x) == UNSPEC
6447 && XINT (*x, 1) == UNSPEC_TLS)
6453 /* Return TRUE if X contains any TLS symbol references. */
6456 arm_tls_referenced_p (rtx x)
6458 if (! TARGET_HAVE_TLS)
6461 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6464 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6467 arm_cannot_force_const_mem (rtx x)
6471 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6473 split_const (x, &base, &offset);
6474 if (GET_CODE (base) == SYMBOL_REF
6475 && !offset_within_block_p (base, INTVAL (offset)))
6478 return arm_tls_referenced_p (x);
6481 #define REG_OR_SUBREG_REG(X) \
6482 (GET_CODE (X) == REG \
6483 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6485 #define REG_OR_SUBREG_RTX(X) \
6486 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6489 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6491 enum machine_mode mode = GET_MODE (x);
6505 return COSTS_N_INSNS (1);
6508 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6511 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6518 return COSTS_N_INSNS (2) + cycles;
6520 return COSTS_N_INSNS (1) + 16;
6523 return (COSTS_N_INSNS (1)
6524 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6525 + GET_CODE (SET_DEST (x)) == MEM));
6530 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6532 if (thumb_shiftable_const (INTVAL (x)))
6533 return COSTS_N_INSNS (2);
6534 return COSTS_N_INSNS (3);
6536 else if ((outer == PLUS || outer == COMPARE)
6537 && INTVAL (x) < 256 && INTVAL (x) > -256)
6539 else if ((outer == IOR || outer == XOR || outer == AND)
6540 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6541 return COSTS_N_INSNS (1);
6542 else if (outer == AND)
6545 /* This duplicates the tests in the andsi3 expander. */
6546 for (i = 9; i <= 31; i++)
6547 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6548 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6549 return COSTS_N_INSNS (2);
6551 else if (outer == ASHIFT || outer == ASHIFTRT
6552 || outer == LSHIFTRT)
6554 return COSTS_N_INSNS (2);
6560 return COSTS_N_INSNS (3);
6578 /* XXX another guess. */
6579 /* Memory costs quite a lot for the first word, but subsequent words
6580 load at the equivalent of a single insn each. */
6581 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6582 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6587 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6593 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6594 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6600 return total + COSTS_N_INSNS (1);
6602 /* Assume a two-shift sequence. Increase the cost slightly so
6603 we prefer actual shifts over an extend operation. */
6604 return total + 1 + COSTS_N_INSNS (2);
6612 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6614 enum machine_mode mode = GET_MODE (x);
6615 enum rtx_code subcode;
6617 enum rtx_code code = GET_CODE (x);
6623 /* Memory costs quite a lot for the first word, but subsequent words
6624 load at the equivalent of a single insn each. */
6625 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6632 if (TARGET_HARD_FLOAT && mode == SFmode)
6633 *total = COSTS_N_INSNS (2);
6634 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6635 *total = COSTS_N_INSNS (4);
6637 *total = COSTS_N_INSNS (20);
6641 if (GET_CODE (XEXP (x, 1)) == REG)
6642 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6643 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6644 *total = rtx_cost (XEXP (x, 1), code, speed);
6650 *total += COSTS_N_INSNS (4);
6655 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6656 *total += rtx_cost (XEXP (x, 0), code, speed);
6659 *total += COSTS_N_INSNS (3);
6663 *total += COSTS_N_INSNS (1);
6664 /* Increase the cost of complex shifts because they aren't any faster,
6665 and reduce dual issue opportunities. */
6666 if (arm_tune_cortex_a9
6667 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6675 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6676 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6677 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6679 *total += rtx_cost (XEXP (x, 1), code, speed);
6683 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6684 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6686 *total += rtx_cost (XEXP (x, 0), code, speed);
6693 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6695 if (TARGET_HARD_FLOAT
6697 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6699 *total = COSTS_N_INSNS (1);
6700 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6701 && arm_const_double_rtx (XEXP (x, 0)))
6703 *total += rtx_cost (XEXP (x, 1), code, speed);
6707 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6708 && arm_const_double_rtx (XEXP (x, 1)))
6710 *total += rtx_cost (XEXP (x, 0), code, speed);
6716 *total = COSTS_N_INSNS (20);
6720 *total = COSTS_N_INSNS (1);
6721 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6722 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6724 *total += rtx_cost (XEXP (x, 1), code, speed);
6728 subcode = GET_CODE (XEXP (x, 1));
6729 if (subcode == ASHIFT || subcode == ASHIFTRT
6730 || subcode == LSHIFTRT
6731 || subcode == ROTATE || subcode == ROTATERT)
6733 *total += rtx_cost (XEXP (x, 0), code, speed);
6734 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6738 /* A shift as a part of RSB costs no more than RSB itself. */
6739 if (GET_CODE (XEXP (x, 0)) == MULT
6740 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6742 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6743 *total += rtx_cost (XEXP (x, 1), code, speed);
6748 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6750 *total += rtx_cost (XEXP (x, 0), code, speed);
6751 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6755 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6756 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6758 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6759 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6760 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6761 *total += COSTS_N_INSNS (1);
6769 if (code == PLUS && arm_arch6 && mode == SImode
6770 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6771 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6773 *total = COSTS_N_INSNS (1);
6774 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6776 *total += rtx_cost (XEXP (x, 1), code, speed);
6780 /* MLA: All arguments must be registers. We filter out
6781 multiplication by a power of two, so that we fall down into
6783 if (GET_CODE (XEXP (x, 0)) == MULT
6784 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6786 /* The cost comes from the cost of the multiply. */
6790 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6792 if (TARGET_HARD_FLOAT
6794 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6796 *total = COSTS_N_INSNS (1);
6797 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6798 && arm_const_double_rtx (XEXP (x, 1)))
6800 *total += rtx_cost (XEXP (x, 0), code, speed);
6807 *total = COSTS_N_INSNS (20);
6811 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6812 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6814 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6815 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6816 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6817 *total += COSTS_N_INSNS (1);
6823 case AND: case XOR: case IOR:
6825 /* Normally the frame registers will be spilt into reg+const during
6826 reload, so it is a bad idea to combine them with other instructions,
6827 since then they might not be moved outside of loops. As a compromise
6828 we allow integration with ops that have a constant as their second
6830 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6831 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6832 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6833 *total = COSTS_N_INSNS (1);
6837 *total += COSTS_N_INSNS (2);
6838 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6839 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6841 *total += rtx_cost (XEXP (x, 0), code, speed);
6848 *total += COSTS_N_INSNS (1);
6849 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6850 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6852 *total += rtx_cost (XEXP (x, 0), code, speed);
6855 subcode = GET_CODE (XEXP (x, 0));
6856 if (subcode == ASHIFT || subcode == ASHIFTRT
6857 || subcode == LSHIFTRT
6858 || subcode == ROTATE || subcode == ROTATERT)
6860 *total += rtx_cost (XEXP (x, 1), code, speed);
6861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6866 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6868 *total += rtx_cost (XEXP (x, 1), code, speed);
6869 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6873 if (subcode == UMIN || subcode == UMAX
6874 || subcode == SMIN || subcode == SMAX)
6876 *total = COSTS_N_INSNS (3);
6883 /* This should have been handled by the CPU specific routines. */
6887 if (arm_arch3m && mode == SImode
6888 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6889 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6890 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6891 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6892 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6893 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6895 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6898 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6902 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6904 if (TARGET_HARD_FLOAT
6906 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6908 *total = COSTS_N_INSNS (1);
6911 *total = COSTS_N_INSNS (2);
6917 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6918 if (mode == SImode && code == NOT)
6920 subcode = GET_CODE (XEXP (x, 0));
6921 if (subcode == ASHIFT || subcode == ASHIFTRT
6922 || subcode == LSHIFTRT
6923 || subcode == ROTATE || subcode == ROTATERT
6925 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6927 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6928 /* Register shifts cost an extra cycle. */
6929 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6930 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6939 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6941 *total = COSTS_N_INSNS (4);
6945 operand = XEXP (x, 0);
6947 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6948 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6949 && GET_CODE (XEXP (operand, 0)) == REG
6950 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6951 *total += COSTS_N_INSNS (1);
6952 *total += (rtx_cost (XEXP (x, 1), code, speed)
6953 + rtx_cost (XEXP (x, 2), code, speed));
6957 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6959 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6965 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6966 && mode == SImode && XEXP (x, 1) == const0_rtx)
6968 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6974 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6975 && mode == SImode && XEXP (x, 1) == const0_rtx)
6977 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6997 /* SCC insns. In the case where the comparison has already been
6998 performed, then they cost 2 instructions. Otherwise they need
6999 an additional comparison before them. */
7000 *total = COSTS_N_INSNS (2);
7001 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7008 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7014 *total += COSTS_N_INSNS (1);
7015 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7016 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7018 *total += rtx_cost (XEXP (x, 0), code, speed);
7022 subcode = GET_CODE (XEXP (x, 0));
7023 if (subcode == ASHIFT || subcode == ASHIFTRT
7024 || subcode == LSHIFTRT
7025 || subcode == ROTATE || subcode == ROTATERT)
7027 *total += rtx_cost (XEXP (x, 1), code, speed);
7028 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7033 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7035 *total += rtx_cost (XEXP (x, 1), code, speed);
7036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7046 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7047 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7048 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7049 *total += rtx_cost (XEXP (x, 1), code, speed);
7053 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7055 if (TARGET_HARD_FLOAT
7057 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7059 *total = COSTS_N_INSNS (1);
7062 *total = COSTS_N_INSNS (20);
7065 *total = COSTS_N_INSNS (1);
7067 *total += COSTS_N_INSNS (3);
7073 if (GET_MODE_CLASS (mode) == MODE_INT)
7075 rtx op = XEXP (x, 0);
7076 enum machine_mode opmode = GET_MODE (op);
7079 *total += COSTS_N_INSNS (1);
7081 if (opmode != SImode)
7085 /* If !arm_arch4, we use one of the extendhisi2_mem
7086 or movhi_bytes patterns for HImode. For a QImode
7087 sign extension, we first zero-extend from memory
7088 and then perform a shift sequence. */
7089 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7090 *total += COSTS_N_INSNS (2);
7093 *total += COSTS_N_INSNS (1);
7095 /* We don't have the necessary insn, so we need to perform some
7097 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7098 /* An and with constant 255. */
7099 *total += COSTS_N_INSNS (1);
7101 /* A shift sequence. Increase costs slightly to avoid
7102 combining two shifts into an extend operation. */
7103 *total += COSTS_N_INSNS (2) + 1;
7109 switch (GET_MODE (XEXP (x, 0)))
7116 *total = COSTS_N_INSNS (1);
7126 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7130 if (const_ok_for_arm (INTVAL (x))
7131 || const_ok_for_arm (~INTVAL (x)))
7132 *total = COSTS_N_INSNS (1);
7134 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7135 INTVAL (x), NULL_RTX,
7142 *total = COSTS_N_INSNS (3);
7146 *total = COSTS_N_INSNS (1);
7150 *total = COSTS_N_INSNS (1);
7151 *total += rtx_cost (XEXP (x, 0), code, speed);
7155 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7156 && (mode == SFmode || !TARGET_VFP_SINGLE))
7157 *total = COSTS_N_INSNS (1);
7159 *total = COSTS_N_INSNS (4);
7163 *total = COSTS_N_INSNS (4);
7168 /* Estimates the size cost of thumb1 instructions.
7169 For now most of the code is copied from thumb1_rtx_costs. We need more
7170 fine grain tuning when we have more related test cases. */
7172 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7174 enum machine_mode mode = GET_MODE (x);
7187 return COSTS_N_INSNS (1);
7190 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7192 /* Thumb1 mul instruction can't operate on const. We must Load it
7193 into a register first. */
7194 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7195 return COSTS_N_INSNS (1) + const_size;
7197 return COSTS_N_INSNS (1);
7200 return (COSTS_N_INSNS (1)
7201 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7202 + GET_CODE (SET_DEST (x)) == MEM));
7207 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7208 return COSTS_N_INSNS (1);
7209 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7210 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7211 return COSTS_N_INSNS (2);
7212 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7213 if (thumb_shiftable_const (INTVAL (x)))
7214 return COSTS_N_INSNS (2);
7215 return COSTS_N_INSNS (3);
7217 else if ((outer == PLUS || outer == COMPARE)
7218 && INTVAL (x) < 256 && INTVAL (x) > -256)
7220 else if ((outer == IOR || outer == XOR || outer == AND)
7221 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7222 return COSTS_N_INSNS (1);
7223 else if (outer == AND)
7226 /* This duplicates the tests in the andsi3 expander. */
7227 for (i = 9; i <= 31; i++)
7228 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7229 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7230 return COSTS_N_INSNS (2);
7232 else if (outer == ASHIFT || outer == ASHIFTRT
7233 || outer == LSHIFTRT)
7235 return COSTS_N_INSNS (2);
7241 return COSTS_N_INSNS (3);
7259 /* XXX another guess. */
7260 /* Memory costs quite a lot for the first word, but subsequent words
7261 load at the equivalent of a single insn each. */
7262 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7263 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7268 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7273 /* XXX still guessing. */
7274 switch (GET_MODE (XEXP (x, 0)))
7277 return (1 + (mode == DImode ? 4 : 0)
7278 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7281 return (4 + (mode == DImode ? 4 : 0)
7282 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7285 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7296 /* RTX costs when optimizing for size. */
7298 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7301 enum machine_mode mode = GET_MODE (x);
7304 *total = thumb1_size_rtx_costs (x, code, outer_code);
7308 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7312 /* A memory access costs 1 insn if the mode is small, or the address is
7313 a single register, otherwise it costs one insn per word. */
7314 if (REG_P (XEXP (x, 0)))
7315 *total = COSTS_N_INSNS (1);
7317 && GET_CODE (XEXP (x, 0)) == PLUS
7318 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7319 /* This will be split into two instructions.
7320 See arm.md:calculate_pic_address. */
7321 *total = COSTS_N_INSNS (2);
7323 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7330 /* Needs a libcall, so it costs about this. */
7331 *total = COSTS_N_INSNS (2);
7335 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7337 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7345 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7347 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7350 else if (mode == SImode)
7352 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7353 /* Slightly disparage register shifts, but not by much. */
7354 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7355 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7359 /* Needs a libcall. */
7360 *total = COSTS_N_INSNS (2);
7364 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7365 && (mode == SFmode || !TARGET_VFP_SINGLE))
7367 *total = COSTS_N_INSNS (1);
7373 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7374 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7376 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7377 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7378 || subcode1 == ROTATE || subcode1 == ROTATERT
7379 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7380 || subcode1 == ASHIFTRT)
7382 /* It's just the cost of the two operands. */
7387 *total = COSTS_N_INSNS (1);
7391 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7395 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7396 && (mode == SFmode || !TARGET_VFP_SINGLE))
7398 *total = COSTS_N_INSNS (1);
7402 /* A shift as a part of ADD costs nothing. */
7403 if (GET_CODE (XEXP (x, 0)) == MULT
7404 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7406 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7407 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7408 *total += rtx_cost (XEXP (x, 1), code, false);
7413 case AND: case XOR: case IOR:
7416 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7418 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7419 || subcode == LSHIFTRT || subcode == ASHIFTRT
7420 || (code == AND && subcode == NOT))
7422 /* It's just the cost of the two operands. */
7428 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7432 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7436 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7437 && (mode == SFmode || !TARGET_VFP_SINGLE))
7439 *total = COSTS_N_INSNS (1);
7445 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7454 if (cc_register (XEXP (x, 0), VOIDmode))
7457 *total = COSTS_N_INSNS (1);
7461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7462 && (mode == SFmode || !TARGET_VFP_SINGLE))
7463 *total = COSTS_N_INSNS (1);
7465 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7470 return arm_rtx_costs_1 (x, outer_code, total, 0);
7473 if (const_ok_for_arm (INTVAL (x)))
7474 /* A multiplication by a constant requires another instruction
7475 to load the constant to a register. */
7476 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7478 else if (const_ok_for_arm (~INTVAL (x)))
7479 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7480 else if (const_ok_for_arm (-INTVAL (x)))
7482 if (outer_code == COMPARE || outer_code == PLUS
7483 || outer_code == MINUS)
7486 *total = COSTS_N_INSNS (1);
7489 *total = COSTS_N_INSNS (2);
7495 *total = COSTS_N_INSNS (2);
7499 *total = COSTS_N_INSNS (4);
7504 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7505 cost of these slightly. */
7506 *total = COSTS_N_INSNS (1) + 1;
7510 if (mode != VOIDmode)
7511 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7513 *total = COSTS_N_INSNS (4); /* How knows? */
7518 /* RTX costs when optimizing for size. */
7520 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7524 return arm_size_rtx_costs (x, (enum rtx_code) code,
7525 (enum rtx_code) outer_code, total);
7527 return current_tune->rtx_costs (x, (enum rtx_code) code,
7528 (enum rtx_code) outer_code,
7532 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7533 supported on any "slowmul" cores, so it can be ignored. */
7536 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7537 int *total, bool speed)
7539 enum machine_mode mode = GET_MODE (x);
7543 *total = thumb1_rtx_costs (x, code, outer_code);
7550 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7553 *total = COSTS_N_INSNS (20);
7557 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7559 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7560 & (unsigned HOST_WIDE_INT) 0xffffffff);
7561 int cost, const_ok = const_ok_for_arm (i);
7562 int j, booth_unit_size;
7564 /* Tune as appropriate. */
7565 cost = const_ok ? 4 : 8;
7566 booth_unit_size = 2;
7567 for (j = 0; i && j < 32; j += booth_unit_size)
7569 i >>= booth_unit_size;
7573 *total = COSTS_N_INSNS (cost);
7574 *total += rtx_cost (XEXP (x, 0), code, speed);
7578 *total = COSTS_N_INSNS (20);
7582 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7587 /* RTX cost for cores with a fast multiply unit (M variants). */
7590 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7591 int *total, bool speed)
7593 enum machine_mode mode = GET_MODE (x);
7597 *total = thumb1_rtx_costs (x, code, outer_code);
7601 /* ??? should thumb2 use different costs? */
7605 /* There is no point basing this on the tuning, since it is always the
7606 fast variant if it exists at all. */
7608 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7609 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7610 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7612 *total = COSTS_N_INSNS(2);
7619 *total = COSTS_N_INSNS (5);
7623 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7625 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7626 & (unsigned HOST_WIDE_INT) 0xffffffff);
7627 int cost, const_ok = const_ok_for_arm (i);
7628 int j, booth_unit_size;
7630 /* Tune as appropriate. */
7631 cost = const_ok ? 4 : 8;
7632 booth_unit_size = 8;
7633 for (j = 0; i && j < 32; j += booth_unit_size)
7635 i >>= booth_unit_size;
7639 *total = COSTS_N_INSNS(cost);
7645 *total = COSTS_N_INSNS (4);
7649 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7651 if (TARGET_HARD_FLOAT
7653 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7655 *total = COSTS_N_INSNS (1);
7660 /* Requires a lib call */
7661 *total = COSTS_N_INSNS (20);
7665 return arm_rtx_costs_1 (x, outer_code, total, speed);
7670 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7671 so it can be ignored. */
7674 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7675 int *total, bool speed)
7677 enum machine_mode mode = GET_MODE (x);
7681 *total = thumb1_rtx_costs (x, code, outer_code);
7688 if (GET_CODE (XEXP (x, 0)) != MULT)
7689 return arm_rtx_costs_1 (x, outer_code, total, speed);
7691 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7692 will stall until the multiplication is complete. */
7693 *total = COSTS_N_INSNS (3);
7697 /* There is no point basing this on the tuning, since it is always the
7698 fast variant if it exists at all. */
7700 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7701 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7702 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7704 *total = COSTS_N_INSNS (2);
7711 *total = COSTS_N_INSNS (5);
7715 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7717 /* If operand 1 is a constant we can more accurately
7718 calculate the cost of the multiply. The multiplier can
7719 retire 15 bits on the first cycle and a further 12 on the
7720 second. We do, of course, have to load the constant into
7721 a register first. */
7722 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7723 /* There's a general overhead of one cycle. */
7725 unsigned HOST_WIDE_INT masked_const;
7730 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7732 masked_const = i & 0xffff8000;
7733 if (masked_const != 0)
7736 masked_const = i & 0xf8000000;
7737 if (masked_const != 0)
7740 *total = COSTS_N_INSNS (cost);
7746 *total = COSTS_N_INSNS (3);
7750 /* Requires a lib call */
7751 *total = COSTS_N_INSNS (20);
7755 return arm_rtx_costs_1 (x, outer_code, total, speed);
7760 /* RTX costs for 9e (and later) cores. */
7763 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7764 int *total, bool speed)
7766 enum machine_mode mode = GET_MODE (x);
7773 *total = COSTS_N_INSNS (3);
7777 *total = thumb1_rtx_costs (x, code, outer_code);
7785 /* There is no point basing this on the tuning, since it is always the
7786 fast variant if it exists at all. */
7788 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7789 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7790 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7792 *total = COSTS_N_INSNS (2);
7799 *total = COSTS_N_INSNS (5);
7805 *total = COSTS_N_INSNS (2);
7809 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7811 if (TARGET_HARD_FLOAT
7813 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7815 *total = COSTS_N_INSNS (1);
7820 *total = COSTS_N_INSNS (20);
7824 return arm_rtx_costs_1 (x, outer_code, total, speed);
7827 /* All address computations that can be done are free, but rtx cost returns
7828 the same for practically all of them. So we weight the different types
7829 of address here in the order (most pref first):
7830 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7832 arm_arm_address_cost (rtx x)
7834 enum rtx_code c = GET_CODE (x);
7836 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7838 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7843 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7846 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7856 arm_thumb_address_cost (rtx x)
7858 enum rtx_code c = GET_CODE (x);
7863 && GET_CODE (XEXP (x, 0)) == REG
7864 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7871 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7873 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7876 /* Adjust cost hook for XScale. */
7878 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7880 /* Some true dependencies can have a higher cost depending
7881 on precisely how certain input operands are used. */
7882 if (REG_NOTE_KIND(link) == 0
7883 && recog_memoized (insn) >= 0
7884 && recog_memoized (dep) >= 0)
7886 int shift_opnum = get_attr_shift (insn);
7887 enum attr_type attr_type = get_attr_type (dep);
7889 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7890 operand for INSN. If we have a shifted input operand and the
7891 instruction we depend on is another ALU instruction, then we may
7892 have to account for an additional stall. */
7893 if (shift_opnum != 0
7894 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7896 rtx shifted_operand;
7899 /* Get the shifted operand. */
7900 extract_insn (insn);
7901 shifted_operand = recog_data.operand[shift_opnum];
7903 /* Iterate over all the operands in DEP. If we write an operand
7904 that overlaps with SHIFTED_OPERAND, then we have increase the
7905 cost of this dependency. */
7907 preprocess_constraints ();
7908 for (opno = 0; opno < recog_data.n_operands; opno++)
7910 /* We can ignore strict inputs. */
7911 if (recog_data.operand_type[opno] == OP_IN)
7914 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7926 /* Adjust cost hook for Cortex A9. */
7928 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7930 switch (REG_NOTE_KIND (link))
7937 case REG_DEP_OUTPUT:
7938 if (recog_memoized (insn) >= 0
7939 && recog_memoized (dep) >= 0)
7941 if (GET_CODE (PATTERN (insn)) == SET)
7944 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7946 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7948 enum attr_type attr_type_insn = get_attr_type (insn);
7949 enum attr_type attr_type_dep = get_attr_type (dep);
7951 /* By default all dependencies of the form
7954 have an extra latency of 1 cycle because
7955 of the input and output dependency in this
7956 case. However this gets modeled as an true
7957 dependency and hence all these checks. */
7958 if (REG_P (SET_DEST (PATTERN (insn)))
7959 && REG_P (SET_DEST (PATTERN (dep)))
7960 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7961 SET_DEST (PATTERN (dep))))
7963 /* FMACS is a special case where the dependant
7964 instruction can be issued 3 cycles before
7965 the normal latency in case of an output
7967 if ((attr_type_insn == TYPE_FMACS
7968 || attr_type_insn == TYPE_FMACD)
7969 && (attr_type_dep == TYPE_FMACS
7970 || attr_type_dep == TYPE_FMACD))
7972 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7973 *cost = insn_default_latency (dep) - 3;
7975 *cost = insn_default_latency (dep);
7980 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7981 *cost = insn_default_latency (dep) + 1;
7983 *cost = insn_default_latency (dep);
7999 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8000 It corrects the value of COST based on the relationship between
8001 INSN and DEP through the dependence LINK. It returns the new
8002 value. There is a per-core adjust_cost hook to adjust scheduler costs
8003 and the per-core hook can choose to completely override the generic
8004 adjust_cost function. Only put bits of code into arm_adjust_cost that
8005 are common across all cores. */
8007 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8011 /* When generating Thumb-1 code, we want to place flag-setting operations
8012 close to a conditional branch which depends on them, so that we can
8013 omit the comparison. */
8015 && REG_NOTE_KIND (link) == 0
8016 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8017 && recog_memoized (dep) >= 0
8018 && get_attr_conds (dep) == CONDS_SET)
8021 if (current_tune->sched_adjust_cost != NULL)
8023 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8027 /* XXX This is not strictly true for the FPA. */
8028 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8029 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8032 /* Call insns don't incur a stall, even if they follow a load. */
8033 if (REG_NOTE_KIND (link) == 0
8034 && GET_CODE (insn) == CALL_INSN)
8037 if ((i_pat = single_set (insn)) != NULL
8038 && GET_CODE (SET_SRC (i_pat)) == MEM
8039 && (d_pat = single_set (dep)) != NULL
8040 && GET_CODE (SET_DEST (d_pat)) == MEM)
8042 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8043 /* This is a load after a store, there is no conflict if the load reads
8044 from a cached area. Assume that loads from the stack, and from the
8045 constant pool are cached, and that others will miss. This is a
8048 if ((GET_CODE (src_mem) == SYMBOL_REF
8049 && CONSTANT_POOL_ADDRESS_P (src_mem))
8050 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8051 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8052 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8059 static int fp_consts_inited = 0;
8061 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8062 static const char * const strings_fp[8] =
8065 "4", "5", "0.5", "10"
8068 static REAL_VALUE_TYPE values_fp[8];
8071 init_fp_table (void)
8077 fp_consts_inited = 1;
8079 fp_consts_inited = 8;
8081 for (i = 0; i < fp_consts_inited; i++)
8083 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8088 /* Return TRUE if rtx X is a valid immediate FP constant. */
8090 arm_const_double_rtx (rtx x)
8095 if (!fp_consts_inited)
8098 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8099 if (REAL_VALUE_MINUS_ZERO (r))
8102 for (i = 0; i < fp_consts_inited; i++)
8103 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8109 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8111 neg_const_double_rtx_ok_for_fpa (rtx x)
8116 if (!fp_consts_inited)
8119 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8120 r = real_value_negate (&r);
8121 if (REAL_VALUE_MINUS_ZERO (r))
8124 for (i = 0; i < 8; i++)
8125 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8132 /* VFPv3 has a fairly wide range of representable immediates, formed from
8133 "quarter-precision" floating-point values. These can be evaluated using this
8134 formula (with ^ for exponentiation):
8138 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8139 16 <= n <= 31 and 0 <= r <= 7.
8141 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8143 - A (most-significant) is the sign bit.
8144 - BCD are the exponent (encoded as r XOR 3).
8145 - EFGH are the mantissa (encoded as n - 16).
8148 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8149 fconst[sd] instruction, or -1 if X isn't suitable. */
8151 vfp3_const_double_index (rtx x)
8153 REAL_VALUE_TYPE r, m;
8155 unsigned HOST_WIDE_INT mantissa, mant_hi;
8156 unsigned HOST_WIDE_INT mask;
8157 HOST_WIDE_INT m1, m2;
8158 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8160 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8163 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8165 /* We can't represent these things, so detect them first. */
8166 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8169 /* Extract sign, exponent and mantissa. */
8170 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8171 r = real_value_abs (&r);
8172 exponent = REAL_EXP (&r);
8173 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8174 highest (sign) bit, with a fixed binary point at bit point_pos.
8175 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8176 bits for the mantissa, this may fail (low bits would be lost). */
8177 real_ldexp (&m, &r, point_pos - exponent);
8178 REAL_VALUE_TO_INT (&m1, &m2, m);
8182 /* If there are bits set in the low part of the mantissa, we can't
8183 represent this value. */
8187 /* Now make it so that mantissa contains the most-significant bits, and move
8188 the point_pos to indicate that the least-significant bits have been
8190 point_pos -= HOST_BITS_PER_WIDE_INT;
8193 /* We can permit four significant bits of mantissa only, plus a high bit
8194 which is always 1. */
8195 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8196 if ((mantissa & mask) != 0)
8199 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8200 mantissa >>= point_pos - 5;
8202 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8203 floating-point immediate zero with Neon using an integer-zero load, but
8204 that case is handled elsewhere.) */
8208 gcc_assert (mantissa >= 16 && mantissa <= 31);
8210 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8211 normalized significands are in the range [1, 2). (Our mantissa is shifted
8212 left 4 places at this point relative to normalized IEEE754 values). GCC
8213 internally uses [0.5, 1) (see real.c), so the exponent returned from
8214 REAL_EXP must be altered. */
8215 exponent = 5 - exponent;
8217 if (exponent < 0 || exponent > 7)
8220 /* Sign, mantissa and exponent are now in the correct form to plug into the
8221 formula described in the comment above. */
8222 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8225 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8227 vfp3_const_double_rtx (rtx x)
8232 return vfp3_const_double_index (x) != -1;
8235 /* Recognize immediates which can be used in various Neon instructions. Legal
8236 immediates are described by the following table (for VMVN variants, the
8237 bitwise inverse of the constant shown is recognized. In either case, VMOV
8238 is output and the correct instruction to use for a given constant is chosen
8239 by the assembler). The constant shown is replicated across all elements of
8240 the destination vector.
8242 insn elems variant constant (binary)
8243 ---- ----- ------- -----------------
8244 vmov i32 0 00000000 00000000 00000000 abcdefgh
8245 vmov i32 1 00000000 00000000 abcdefgh 00000000
8246 vmov i32 2 00000000 abcdefgh 00000000 00000000
8247 vmov i32 3 abcdefgh 00000000 00000000 00000000
8248 vmov i16 4 00000000 abcdefgh
8249 vmov i16 5 abcdefgh 00000000
8250 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8251 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8252 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8253 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8254 vmvn i16 10 00000000 abcdefgh
8255 vmvn i16 11 abcdefgh 00000000
8256 vmov i32 12 00000000 00000000 abcdefgh 11111111
8257 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8258 vmov i32 14 00000000 abcdefgh 11111111 11111111
8259 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8261 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8262 eeeeeeee ffffffff gggggggg hhhhhhhh
8263 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8265 For case 18, B = !b. Representable values are exactly those accepted by
8266 vfp3_const_double_index, but are output as floating-point numbers rather
8269 Variants 0-5 (inclusive) may also be used as immediates for the second
8270 operand of VORR/VBIC instructions.
8272 The INVERSE argument causes the bitwise inverse of the given operand to be
8273 recognized instead (used for recognizing legal immediates for the VAND/VORN
8274 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8275 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8276 output, rather than the real insns vbic/vorr).
8278 INVERSE makes no difference to the recognition of float vectors.
8280 The return value is the variant of immediate as shown in the above table, or
8281 -1 if the given value doesn't match any of the listed patterns.
8284 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8285 rtx *modconst, int *elementwidth)
8287 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8289 for (i = 0; i < idx; i += (STRIDE)) \
8294 immtype = (CLASS); \
8295 elsize = (ELSIZE); \
8299 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8300 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8301 unsigned char bytes[16];
8302 int immtype = -1, matches;
8303 unsigned int invmask = inverse ? 0xff : 0;
8305 /* Vectors of float constants. */
8306 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8308 rtx el0 = CONST_VECTOR_ELT (op, 0);
8311 if (!vfp3_const_double_rtx (el0))
8314 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8316 for (i = 1; i < n_elts; i++)
8318 rtx elt = CONST_VECTOR_ELT (op, i);
8321 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8323 if (!REAL_VALUES_EQUAL (r0, re))
8328 *modconst = CONST_VECTOR_ELT (op, 0);
8336 /* Splat vector constant out into a byte vector. */
8337 for (i = 0; i < n_elts; i++)
8339 rtx el = CONST_VECTOR_ELT (op, i);
8340 unsigned HOST_WIDE_INT elpart;
8341 unsigned int part, parts;
8343 if (GET_CODE (el) == CONST_INT)
8345 elpart = INTVAL (el);
8348 else if (GET_CODE (el) == CONST_DOUBLE)
8350 elpart = CONST_DOUBLE_LOW (el);
8356 for (part = 0; part < parts; part++)
8359 for (byte = 0; byte < innersize; byte++)
8361 bytes[idx++] = (elpart & 0xff) ^ invmask;
8362 elpart >>= BITS_PER_UNIT;
8364 if (GET_CODE (el) == CONST_DOUBLE)
8365 elpart = CONST_DOUBLE_HIGH (el);
8370 gcc_assert (idx == GET_MODE_SIZE (mode));
8374 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8375 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8377 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8378 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8380 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8381 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8383 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8384 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8386 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8388 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8390 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8391 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8393 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8394 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8396 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8397 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8399 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8400 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8402 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8404 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8406 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8407 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8409 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8410 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8412 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8413 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8415 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8416 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8418 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8420 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8421 && bytes[i] == bytes[(i + 8) % idx]);
8429 *elementwidth = elsize;
8433 unsigned HOST_WIDE_INT imm = 0;
8435 /* Un-invert bytes of recognized vector, if necessary. */
8437 for (i = 0; i < idx; i++)
8438 bytes[i] ^= invmask;
8442 /* FIXME: Broken on 32-bit H_W_I hosts. */
8443 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8445 for (i = 0; i < 8; i++)
8446 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8447 << (i * BITS_PER_UNIT);
8449 *modconst = GEN_INT (imm);
8453 unsigned HOST_WIDE_INT imm = 0;
8455 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8456 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8458 *modconst = GEN_INT (imm);
8466 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8467 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8468 float elements), and a modified constant (whatever should be output for a
8469 VMOV) in *MODCONST. */
8472 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8473 rtx *modconst, int *elementwidth)
8477 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8483 *modconst = tmpconst;
8486 *elementwidth = tmpwidth;
8491 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8492 the immediate is valid, write a constant suitable for using as an operand
8493 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8494 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8497 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8498 rtx *modconst, int *elementwidth)
8502 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8504 if (retval < 0 || retval > 5)
8508 *modconst = tmpconst;
8511 *elementwidth = tmpwidth;
8516 /* Return a string suitable for output of Neon immediate logic operation
8520 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8521 int inverse, int quad)
8523 int width, is_valid;
8524 static char templ[40];
8526 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8528 gcc_assert (is_valid != 0);
8531 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8533 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8538 /* Output a sequence of pairwise operations to implement a reduction.
8539 NOTE: We do "too much work" here, because pairwise operations work on two
8540 registers-worth of operands in one go. Unfortunately we can't exploit those
8541 extra calculations to do the full operation in fewer steps, I don't think.
8542 Although all vector elements of the result but the first are ignored, we
8543 actually calculate the same result in each of the elements. An alternative
8544 such as initially loading a vector with zero to use as each of the second
8545 operands would use up an additional register and take an extra instruction,
8546 for no particular gain. */
8549 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8550 rtx (*reduc) (rtx, rtx, rtx))
8552 enum machine_mode inner = GET_MODE_INNER (mode);
8553 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8556 for (i = parts / 2; i >= 1; i /= 2)
8558 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8559 emit_insn (reduc (dest, tmpsum, tmpsum));
8564 /* If VALS is a vector constant that can be loaded into a register
8565 using VDUP, generate instructions to do so and return an RTX to
8566 assign to the register. Otherwise return NULL_RTX. */
8569 neon_vdup_constant (rtx vals)
8571 enum machine_mode mode = GET_MODE (vals);
8572 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8573 int n_elts = GET_MODE_NUNITS (mode);
8574 bool all_same = true;
8578 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8581 for (i = 0; i < n_elts; ++i)
8583 x = XVECEXP (vals, 0, i);
8584 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8589 /* The elements are not all the same. We could handle repeating
8590 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8591 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8595 /* We can load this constant by using VDUP and a constant in a
8596 single ARM register. This will be cheaper than a vector
8599 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8600 return gen_rtx_VEC_DUPLICATE (mode, x);
8603 /* Generate code to load VALS, which is a PARALLEL containing only
8604 constants (for vec_init) or CONST_VECTOR, efficiently into a
8605 register. Returns an RTX to copy into the register, or NULL_RTX
8606 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8609 neon_make_constant (rtx vals)
8611 enum machine_mode mode = GET_MODE (vals);
8613 rtx const_vec = NULL_RTX;
8614 int n_elts = GET_MODE_NUNITS (mode);
8618 if (GET_CODE (vals) == CONST_VECTOR)
8620 else if (GET_CODE (vals) == PARALLEL)
8622 /* A CONST_VECTOR must contain only CONST_INTs and
8623 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8624 Only store valid constants in a CONST_VECTOR. */
8625 for (i = 0; i < n_elts; ++i)
8627 rtx x = XVECEXP (vals, 0, i);
8628 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8631 if (n_const == n_elts)
8632 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8637 if (const_vec != NULL
8638 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8639 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8641 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8642 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8643 pipeline cycle; creating the constant takes one or two ARM
8646 else if (const_vec != NULL_RTX)
8647 /* Load from constant pool. On Cortex-A8 this takes two cycles
8648 (for either double or quad vectors). We can not take advantage
8649 of single-cycle VLD1 because we need a PC-relative addressing
8653 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8654 We can not construct an initializer. */
8658 /* Initialize vector TARGET to VALS. */
8661 neon_expand_vector_init (rtx target, rtx vals)
8663 enum machine_mode mode = GET_MODE (target);
8664 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8665 int n_elts = GET_MODE_NUNITS (mode);
8666 int n_var = 0, one_var = -1;
8667 bool all_same = true;
8671 for (i = 0; i < n_elts; ++i)
8673 x = XVECEXP (vals, 0, i);
8674 if (!CONSTANT_P (x))
8675 ++n_var, one_var = i;
8677 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8683 rtx constant = neon_make_constant (vals);
8684 if (constant != NULL_RTX)
8686 emit_move_insn (target, constant);
8691 /* Splat a single non-constant element if we can. */
8692 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8694 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8695 emit_insn (gen_rtx_SET (VOIDmode, target,
8696 gen_rtx_VEC_DUPLICATE (mode, x)));
8700 /* One field is non-constant. Load constant then overwrite varying
8701 field. This is more efficient than using the stack. */
8704 rtx copy = copy_rtx (vals);
8705 rtx index = GEN_INT (one_var);
8707 /* Load constant part of vector, substitute neighboring value for
8709 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8710 neon_expand_vector_init (target, copy);
8712 /* Insert variable. */
8713 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8717 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8720 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8723 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8726 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8729 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8732 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8735 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8738 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8741 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8749 /* Construct the vector in memory one field at a time
8750 and load the whole vector. */
8751 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8752 for (i = 0; i < n_elts; i++)
8753 emit_move_insn (adjust_address_nv (mem, inner_mode,
8754 i * GET_MODE_SIZE (inner_mode)),
8755 XVECEXP (vals, 0, i));
8756 emit_move_insn (target, mem);
8759 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8760 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8761 reported source locations are bogus. */
8764 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8769 gcc_assert (GET_CODE (operand) == CONST_INT);
8771 lane = INTVAL (operand);
8773 if (lane < low || lane >= high)
8777 /* Bounds-check lanes. */
8780 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8782 bounds_check (operand, low, high, "lane out of range");
8785 /* Bounds-check constants. */
8788 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8790 bounds_check (operand, low, high, "constant out of range");
8794 neon_element_bits (enum machine_mode mode)
8797 return GET_MODE_BITSIZE (mode);
8799 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8803 /* Predicates for `match_operand' and `match_operator'. */
8805 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8807 cirrus_memory_offset (rtx op)
8809 /* Reject eliminable registers. */
8810 if (! (reload_in_progress || reload_completed)
8811 && ( reg_mentioned_p (frame_pointer_rtx, op)
8812 || reg_mentioned_p (arg_pointer_rtx, op)
8813 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8814 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8815 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8816 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8819 if (GET_CODE (op) == MEM)
8825 /* Match: (mem (reg)). */
8826 if (GET_CODE (ind) == REG)
8832 if (GET_CODE (ind) == PLUS
8833 && GET_CODE (XEXP (ind, 0)) == REG
8834 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8835 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8842 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8843 WB is true if full writeback address modes are allowed and is false
8844 if limited writeback address modes (POST_INC and PRE_DEC) are
8848 arm_coproc_mem_operand (rtx op, bool wb)
8852 /* Reject eliminable registers. */
8853 if (! (reload_in_progress || reload_completed)
8854 && ( reg_mentioned_p (frame_pointer_rtx, op)
8855 || reg_mentioned_p (arg_pointer_rtx, op)
8856 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8857 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8858 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8859 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8862 /* Constants are converted into offsets from labels. */
8863 if (GET_CODE (op) != MEM)
8868 if (reload_completed
8869 && (GET_CODE (ind) == LABEL_REF
8870 || (GET_CODE (ind) == CONST
8871 && GET_CODE (XEXP (ind, 0)) == PLUS
8872 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8873 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8876 /* Match: (mem (reg)). */
8877 if (GET_CODE (ind) == REG)
8878 return arm_address_register_rtx_p (ind, 0);
8880 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8881 acceptable in any case (subject to verification by
8882 arm_address_register_rtx_p). We need WB to be true to accept
8883 PRE_INC and POST_DEC. */
8884 if (GET_CODE (ind) == POST_INC
8885 || GET_CODE (ind) == PRE_DEC
8887 && (GET_CODE (ind) == PRE_INC
8888 || GET_CODE (ind) == POST_DEC)))
8889 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8892 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8893 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8894 && GET_CODE (XEXP (ind, 1)) == PLUS
8895 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8896 ind = XEXP (ind, 1);
8901 if (GET_CODE (ind) == PLUS
8902 && GET_CODE (XEXP (ind, 0)) == REG
8903 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8904 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8905 && INTVAL (XEXP (ind, 1)) > -1024
8906 && INTVAL (XEXP (ind, 1)) < 1024
8907 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8913 /* Return TRUE if OP is a memory operand which we can load or store a vector
8914 to/from. TYPE is one of the following values:
8915 0 - Vector load/stor (vldr)
8916 1 - Core registers (ldm)
8917 2 - Element/structure loads (vld1)
8920 neon_vector_mem_operand (rtx op, int type)
8924 /* Reject eliminable registers. */
8925 if (! (reload_in_progress || reload_completed)
8926 && ( reg_mentioned_p (frame_pointer_rtx, op)
8927 || reg_mentioned_p (arg_pointer_rtx, op)
8928 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8929 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8930 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8931 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8934 /* Constants are converted into offsets from labels. */
8935 if (GET_CODE (op) != MEM)
8940 if (reload_completed
8941 && (GET_CODE (ind) == LABEL_REF
8942 || (GET_CODE (ind) == CONST
8943 && GET_CODE (XEXP (ind, 0)) == PLUS
8944 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8945 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8948 /* Match: (mem (reg)). */
8949 if (GET_CODE (ind) == REG)
8950 return arm_address_register_rtx_p (ind, 0);
8952 /* Allow post-increment with Neon registers. */
8953 if ((type != 1 && GET_CODE (ind) == POST_INC)
8954 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8955 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8957 /* FIXME: vld1 allows register post-modify. */
8963 && GET_CODE (ind) == PLUS
8964 && GET_CODE (XEXP (ind, 0)) == REG
8965 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8966 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8967 && INTVAL (XEXP (ind, 1)) > -1024
8968 && INTVAL (XEXP (ind, 1)) < 1016
8969 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8975 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8978 neon_struct_mem_operand (rtx op)
8982 /* Reject eliminable registers. */
8983 if (! (reload_in_progress || reload_completed)
8984 && ( reg_mentioned_p (frame_pointer_rtx, op)
8985 || reg_mentioned_p (arg_pointer_rtx, op)
8986 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8987 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8988 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8989 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8992 /* Constants are converted into offsets from labels. */
8993 if (GET_CODE (op) != MEM)
8998 if (reload_completed
8999 && (GET_CODE (ind) == LABEL_REF
9000 || (GET_CODE (ind) == CONST
9001 && GET_CODE (XEXP (ind, 0)) == PLUS
9002 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9003 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9006 /* Match: (mem (reg)). */
9007 if (GET_CODE (ind) == REG)
9008 return arm_address_register_rtx_p (ind, 0);
9013 /* Return true if X is a register that will be eliminated later on. */
9015 arm_eliminable_register (rtx x)
9017 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9018 || REGNO (x) == ARG_POINTER_REGNUM
9019 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9020 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9023 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9024 coprocessor registers. Otherwise return NO_REGS. */
9027 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9031 if (!TARGET_NEON_FP16)
9032 return GENERAL_REGS;
9033 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9035 return GENERAL_REGS;
9039 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9040 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9041 && neon_vector_mem_operand (x, 0))
9044 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9047 return GENERAL_REGS;
9050 /* Values which must be returned in the most-significant end of the return
9054 arm_return_in_msb (const_tree valtype)
9056 return (TARGET_AAPCS_BASED
9058 && (AGGREGATE_TYPE_P (valtype)
9059 || TREE_CODE (valtype) == COMPLEX_TYPE));
9062 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9063 Use by the Cirrus Maverick code which has to workaround
9064 a hardware bug triggered by such instructions. */
9066 arm_memory_load_p (rtx insn)
9068 rtx body, lhs, rhs;;
9070 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9073 body = PATTERN (insn);
9075 if (GET_CODE (body) != SET)
9078 lhs = XEXP (body, 0);
9079 rhs = XEXP (body, 1);
9081 lhs = REG_OR_SUBREG_RTX (lhs);
9083 /* If the destination is not a general purpose
9084 register we do not have to worry. */
9085 if (GET_CODE (lhs) != REG
9086 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9089 /* As well as loads from memory we also have to react
9090 to loads of invalid constants which will be turned
9091 into loads from the minipool. */
9092 return (GET_CODE (rhs) == MEM
9093 || GET_CODE (rhs) == SYMBOL_REF
9094 || note_invalid_constants (insn, -1, false));
9097 /* Return TRUE if INSN is a Cirrus instruction. */
9099 arm_cirrus_insn_p (rtx insn)
9101 enum attr_cirrus attr;
9103 /* get_attr cannot accept USE or CLOBBER. */
9105 || GET_CODE (insn) != INSN
9106 || GET_CODE (PATTERN (insn)) == USE
9107 || GET_CODE (PATTERN (insn)) == CLOBBER)
9110 attr = get_attr_cirrus (insn);
9112 return attr != CIRRUS_NOT;
9115 /* Cirrus reorg for invalid instruction combinations. */
9117 cirrus_reorg (rtx first)
9119 enum attr_cirrus attr;
9120 rtx body = PATTERN (first);
9124 /* Any branch must be followed by 2 non Cirrus instructions. */
9125 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9128 t = next_nonnote_insn (first);
9130 if (arm_cirrus_insn_p (t))
9133 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9137 emit_insn_after (gen_nop (), first);
9142 /* (float (blah)) is in parallel with a clobber. */
9143 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9144 body = XVECEXP (body, 0, 0);
9146 if (GET_CODE (body) == SET)
9148 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9150 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9151 be followed by a non Cirrus insn. */
9152 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9154 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9155 emit_insn_after (gen_nop (), first);
9159 else if (arm_memory_load_p (first))
9161 unsigned int arm_regno;
9163 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9164 ldr/cfmv64hr combination where the Rd field is the same
9165 in both instructions must be split with a non Cirrus
9172 /* Get Arm register number for ldr insn. */
9173 if (GET_CODE (lhs) == REG)
9174 arm_regno = REGNO (lhs);
9177 gcc_assert (GET_CODE (rhs) == REG);
9178 arm_regno = REGNO (rhs);
9182 first = next_nonnote_insn (first);
9184 if (! arm_cirrus_insn_p (first))
9187 body = PATTERN (first);
9189 /* (float (blah)) is in parallel with a clobber. */
9190 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9191 body = XVECEXP (body, 0, 0);
9193 if (GET_CODE (body) == FLOAT)
9194 body = XEXP (body, 0);
9196 if (get_attr_cirrus (first) == CIRRUS_MOVE
9197 && GET_CODE (XEXP (body, 1)) == REG
9198 && arm_regno == REGNO (XEXP (body, 1)))
9199 emit_insn_after (gen_nop (), first);
9205 /* get_attr cannot accept USE or CLOBBER. */
9207 || GET_CODE (first) != INSN
9208 || GET_CODE (PATTERN (first)) == USE
9209 || GET_CODE (PATTERN (first)) == CLOBBER)
9212 attr = get_attr_cirrus (first);
9214 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9215 must be followed by a non-coprocessor instruction. */
9216 if (attr == CIRRUS_COMPARE)
9220 t = next_nonnote_insn (first);
9222 if (arm_cirrus_insn_p (t))
9225 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9229 emit_insn_after (gen_nop (), first);
9235 /* Return TRUE if X references a SYMBOL_REF. */
9237 symbol_mentioned_p (rtx x)
9242 if (GET_CODE (x) == SYMBOL_REF)
9245 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9246 are constant offsets, not symbols. */
9247 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9250 fmt = GET_RTX_FORMAT (GET_CODE (x));
9252 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9258 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9259 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9262 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9269 /* Return TRUE if X references a LABEL_REF. */
9271 label_mentioned_p (rtx x)
9276 if (GET_CODE (x) == LABEL_REF)
9279 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9280 instruction, but they are constant offsets, not symbols. */
9281 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9284 fmt = GET_RTX_FORMAT (GET_CODE (x));
9285 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9291 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9292 if (label_mentioned_p (XVECEXP (x, i, j)))
9295 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9303 tls_mentioned_p (rtx x)
9305 switch (GET_CODE (x))
9308 return tls_mentioned_p (XEXP (x, 0));
9311 if (XINT (x, 1) == UNSPEC_TLS)
9319 /* Must not copy any rtx that uses a pc-relative address. */
9322 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9324 if (GET_CODE (*x) == UNSPEC
9325 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9331 arm_cannot_copy_insn_p (rtx insn)
9333 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9339 enum rtx_code code = GET_CODE (x);
9356 /* Return 1 if memory locations are adjacent. */
9358 adjacent_mem_locations (rtx a, rtx b)
9360 /* We don't guarantee to preserve the order of these memory refs. */
9361 if (volatile_refs_p (a) || volatile_refs_p (b))
9364 if ((GET_CODE (XEXP (a, 0)) == REG
9365 || (GET_CODE (XEXP (a, 0)) == PLUS
9366 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9367 && (GET_CODE (XEXP (b, 0)) == REG
9368 || (GET_CODE (XEXP (b, 0)) == PLUS
9369 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9371 HOST_WIDE_INT val0 = 0, val1 = 0;
9375 if (GET_CODE (XEXP (a, 0)) == PLUS)
9377 reg0 = XEXP (XEXP (a, 0), 0);
9378 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9383 if (GET_CODE (XEXP (b, 0)) == PLUS)
9385 reg1 = XEXP (XEXP (b, 0), 0);
9386 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9391 /* Don't accept any offset that will require multiple
9392 instructions to handle, since this would cause the
9393 arith_adjacentmem pattern to output an overlong sequence. */
9394 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9397 /* Don't allow an eliminable register: register elimination can make
9398 the offset too large. */
9399 if (arm_eliminable_register (reg0))
9402 val_diff = val1 - val0;
9406 /* If the target has load delay slots, then there's no benefit
9407 to using an ldm instruction unless the offset is zero and
9408 we are optimizing for size. */
9409 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9410 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9411 && (val_diff == 4 || val_diff == -4));
9414 return ((REGNO (reg0) == REGNO (reg1))
9415 && (val_diff == 4 || val_diff == -4));
9421 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9422 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9423 instruction. ADD_OFFSET is nonzero if the base address register needs
9424 to be modified with an add instruction before we can use it. */
9427 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9428 int nops, HOST_WIDE_INT add_offset)
9430 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9431 if the offset isn't small enough. The reason 2 ldrs are faster
9432 is because these ARMs are able to do more than one cache access
9433 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9434 whilst the ARM8 has a double bandwidth cache. This means that
9435 these cores can do both an instruction fetch and a data fetch in
9436 a single cycle, so the trick of calculating the address into a
9437 scratch register (one of the result regs) and then doing a load
9438 multiple actually becomes slower (and no smaller in code size).
9439 That is the transformation
9441 ldr rd1, [rbase + offset]
9442 ldr rd2, [rbase + offset + 4]
9446 add rd1, rbase, offset
9447 ldmia rd1, {rd1, rd2}
9449 produces worse code -- '3 cycles + any stalls on rd2' instead of
9450 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9451 access per cycle, the first sequence could never complete in less
9452 than 6 cycles, whereas the ldm sequence would only take 5 and
9453 would make better use of sequential accesses if not hitting the
9456 We cheat here and test 'arm_ld_sched' which we currently know to
9457 only be true for the ARM8, ARM9 and StrongARM. If this ever
9458 changes, then the test below needs to be reworked. */
9459 if (nops == 2 && arm_ld_sched && add_offset != 0)
9462 /* XScale has load-store double instructions, but they have stricter
9463 alignment requirements than load-store multiple, so we cannot
9466 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9467 the pipeline until completion.
9475 An ldr instruction takes 1-3 cycles, but does not block the
9484 Best case ldr will always win. However, the more ldr instructions
9485 we issue, the less likely we are to be able to schedule them well.
9486 Using ldr instructions also increases code size.
9488 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9489 for counts of 3 or 4 regs. */
9490 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9495 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9496 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9497 an array ORDER which describes the sequence to use when accessing the
9498 offsets that produces an ascending order. In this sequence, each
9499 offset must be larger by exactly 4 than the previous one. ORDER[0]
9500 must have been filled in with the lowest offset by the caller.
9501 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9502 we use to verify that ORDER produces an ascending order of registers.
9503 Return true if it was possible to construct such an order, false if
9507 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9511 for (i = 1; i < nops; i++)
9515 order[i] = order[i - 1];
9516 for (j = 0; j < nops; j++)
9517 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9519 /* We must find exactly one offset that is higher than the
9520 previous one by 4. */
9521 if (order[i] != order[i - 1])
9525 if (order[i] == order[i - 1])
9527 /* The register numbers must be ascending. */
9528 if (unsorted_regs != NULL
9529 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9535 /* Used to determine in a peephole whether a sequence of load
9536 instructions can be changed into a load-multiple instruction.
9537 NOPS is the number of separate load instructions we are examining. The
9538 first NOPS entries in OPERANDS are the destination registers, the
9539 next NOPS entries are memory operands. If this function is
9540 successful, *BASE is set to the common base register of the memory
9541 accesses; *LOAD_OFFSET is set to the first memory location's offset
9542 from that base register.
9543 REGS is an array filled in with the destination register numbers.
9544 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9545 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9546 the sequence of registers in REGS matches the loads from ascending memory
9547 locations, and the function verifies that the register numbers are
9548 themselves ascending. If CHECK_REGS is false, the register numbers
9549 are stored in the order they are found in the operands. */
9551 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9552 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9554 int unsorted_regs[MAX_LDM_STM_OPS];
9555 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9556 int order[MAX_LDM_STM_OPS];
9557 rtx base_reg_rtx = NULL;
9561 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9562 easily extended if required. */
9563 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9565 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9567 /* Loop over the operands and check that the memory references are
9568 suitable (i.e. immediate offsets from the same base register). At
9569 the same time, extract the target register, and the memory
9571 for (i = 0; i < nops; i++)
9576 /* Convert a subreg of a mem into the mem itself. */
9577 if (GET_CODE (operands[nops + i]) == SUBREG)
9578 operands[nops + i] = alter_subreg (operands + (nops + i));
9580 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9582 /* Don't reorder volatile memory references; it doesn't seem worth
9583 looking for the case where the order is ok anyway. */
9584 if (MEM_VOLATILE_P (operands[nops + i]))
9587 offset = const0_rtx;
9589 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9590 || (GET_CODE (reg) == SUBREG
9591 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9592 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9593 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9595 || (GET_CODE (reg) == SUBREG
9596 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9597 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9602 base_reg = REGNO (reg);
9604 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9607 else if (base_reg != (int) REGNO (reg))
9608 /* Not addressed from the same base register. */
9611 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9612 ? REGNO (operands[i])
9613 : REGNO (SUBREG_REG (operands[i])));
9615 /* If it isn't an integer register, or if it overwrites the
9616 base register but isn't the last insn in the list, then
9617 we can't do this. */
9618 if (unsorted_regs[i] < 0
9619 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9620 || unsorted_regs[i] > 14
9621 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9624 unsorted_offsets[i] = INTVAL (offset);
9625 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9629 /* Not a suitable memory address. */
9633 /* All the useful information has now been extracted from the
9634 operands into unsorted_regs and unsorted_offsets; additionally,
9635 order[0] has been set to the lowest offset in the list. Sort
9636 the offsets into order, verifying that they are adjacent, and
9637 check that the register numbers are ascending. */
9638 if (!compute_offset_order (nops, unsorted_offsets, order,
9639 check_regs ? unsorted_regs : NULL))
9643 memcpy (saved_order, order, sizeof order);
9649 for (i = 0; i < nops; i++)
9650 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9652 *load_offset = unsorted_offsets[order[0]];
9656 && !peep2_reg_dead_p (nops, base_reg_rtx))
9659 if (unsorted_offsets[order[0]] == 0)
9660 ldm_case = 1; /* ldmia */
9661 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9662 ldm_case = 2; /* ldmib */
9663 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9664 ldm_case = 3; /* ldmda */
9665 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9666 ldm_case = 4; /* ldmdb */
9667 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9668 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9673 if (!multiple_operation_profitable_p (false, nops,
9675 ? unsorted_offsets[order[0]] : 0))
9681 /* Used to determine in a peephole whether a sequence of store instructions can
9682 be changed into a store-multiple instruction.
9683 NOPS is the number of separate store instructions we are examining.
9684 NOPS_TOTAL is the total number of instructions recognized by the peephole
9686 The first NOPS entries in OPERANDS are the source registers, the next
9687 NOPS entries are memory operands. If this function is successful, *BASE is
9688 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9689 to the first memory location's offset from that base register. REGS is an
9690 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9691 likewise filled with the corresponding rtx's.
9692 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9693 numbers to to an ascending order of stores.
9694 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9695 from ascending memory locations, and the function verifies that the register
9696 numbers are themselves ascending. If CHECK_REGS is false, the register
9697 numbers are stored in the order they are found in the operands. */
9699 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9700 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9701 HOST_WIDE_INT *load_offset, bool check_regs)
9703 int unsorted_regs[MAX_LDM_STM_OPS];
9704 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9705 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9706 int order[MAX_LDM_STM_OPS];
9708 rtx base_reg_rtx = NULL;
9711 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9712 easily extended if required. */
9713 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9715 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9717 /* Loop over the operands and check that the memory references are
9718 suitable (i.e. immediate offsets from the same base register). At
9719 the same time, extract the target register, and the memory
9721 for (i = 0; i < nops; i++)
9726 /* Convert a subreg of a mem into the mem itself. */
9727 if (GET_CODE (operands[nops + i]) == SUBREG)
9728 operands[nops + i] = alter_subreg (operands + (nops + i));
9730 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9732 /* Don't reorder volatile memory references; it doesn't seem worth
9733 looking for the case where the order is ok anyway. */
9734 if (MEM_VOLATILE_P (operands[nops + i]))
9737 offset = const0_rtx;
9739 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9740 || (GET_CODE (reg) == SUBREG
9741 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9742 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9743 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9745 || (GET_CODE (reg) == SUBREG
9746 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9747 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9750 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9751 ? operands[i] : SUBREG_REG (operands[i]));
9752 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9756 base_reg = REGNO (reg);
9758 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9761 else if (base_reg != (int) REGNO (reg))
9762 /* Not addressed from the same base register. */
9765 /* If it isn't an integer register, then we can't do this. */
9766 if (unsorted_regs[i] < 0
9767 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9768 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9769 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9770 || unsorted_regs[i] > 14)
9773 unsorted_offsets[i] = INTVAL (offset);
9774 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9778 /* Not a suitable memory address. */
9782 /* All the useful information has now been extracted from the
9783 operands into unsorted_regs and unsorted_offsets; additionally,
9784 order[0] has been set to the lowest offset in the list. Sort
9785 the offsets into order, verifying that they are adjacent, and
9786 check that the register numbers are ascending. */
9787 if (!compute_offset_order (nops, unsorted_offsets, order,
9788 check_regs ? unsorted_regs : NULL))
9792 memcpy (saved_order, order, sizeof order);
9798 for (i = 0; i < nops; i++)
9800 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9802 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9805 *load_offset = unsorted_offsets[order[0]];
9809 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9812 if (unsorted_offsets[order[0]] == 0)
9813 stm_case = 1; /* stmia */
9814 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9815 stm_case = 2; /* stmib */
9816 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9817 stm_case = 3; /* stmda */
9818 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9819 stm_case = 4; /* stmdb */
9823 if (!multiple_operation_profitable_p (false, nops, 0))
9829 /* Routines for use in generating RTL. */
9831 /* Generate a load-multiple instruction. COUNT is the number of loads in
9832 the instruction; REGS and MEMS are arrays containing the operands.
9833 BASEREG is the base register to be used in addressing the memory operands.
9834 WBACK_OFFSET is nonzero if the instruction should update the base
9838 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9839 HOST_WIDE_INT wback_offset)
9844 if (!multiple_operation_profitable_p (false, count, 0))
9850 for (i = 0; i < count; i++)
9851 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9853 if (wback_offset != 0)
9854 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9862 result = gen_rtx_PARALLEL (VOIDmode,
9863 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9864 if (wback_offset != 0)
9866 XVECEXP (result, 0, 0)
9867 = gen_rtx_SET (VOIDmode, basereg,
9868 plus_constant (basereg, wback_offset));
9873 for (j = 0; i < count; i++, j++)
9874 XVECEXP (result, 0, i)
9875 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9880 /* Generate a store-multiple instruction. COUNT is the number of stores in
9881 the instruction; REGS and MEMS are arrays containing the operands.
9882 BASEREG is the base register to be used in addressing the memory operands.
9883 WBACK_OFFSET is nonzero if the instruction should update the base
9887 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9888 HOST_WIDE_INT wback_offset)
9893 if (GET_CODE (basereg) == PLUS)
9894 basereg = XEXP (basereg, 0);
9896 if (!multiple_operation_profitable_p (false, count, 0))
9902 for (i = 0; i < count; i++)
9903 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9905 if (wback_offset != 0)
9906 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9914 result = gen_rtx_PARALLEL (VOIDmode,
9915 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9916 if (wback_offset != 0)
9918 XVECEXP (result, 0, 0)
9919 = gen_rtx_SET (VOIDmode, basereg,
9920 plus_constant (basereg, wback_offset));
9925 for (j = 0; i < count; i++, j++)
9926 XVECEXP (result, 0, i)
9927 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9932 /* Generate either a load-multiple or a store-multiple instruction. This
9933 function can be used in situations where we can start with a single MEM
9934 rtx and adjust its address upwards.
9935 COUNT is the number of operations in the instruction, not counting a
9936 possible update of the base register. REGS is an array containing the
9938 BASEREG is the base register to be used in addressing the memory operands,
9939 which are constructed from BASEMEM.
9940 WRITE_BACK specifies whether the generated instruction should include an
9941 update of the base register.
9942 OFFSETP is used to pass an offset to and from this function; this offset
9943 is not used when constructing the address (instead BASEMEM should have an
9944 appropriate offset in its address), it is used only for setting
9945 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9948 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9949 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9951 rtx mems[MAX_LDM_STM_OPS];
9952 HOST_WIDE_INT offset = *offsetp;
9955 gcc_assert (count <= MAX_LDM_STM_OPS);
9957 if (GET_CODE (basereg) == PLUS)
9958 basereg = XEXP (basereg, 0);
9960 for (i = 0; i < count; i++)
9962 rtx addr = plus_constant (basereg, i * 4);
9963 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9971 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9972 write_back ? 4 * count : 0);
9974 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9975 write_back ? 4 * count : 0);
9979 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9980 rtx basemem, HOST_WIDE_INT *offsetp)
9982 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9987 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9988 rtx basemem, HOST_WIDE_INT *offsetp)
9990 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9994 /* Called from a peephole2 expander to turn a sequence of loads into an
9995 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9996 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9997 is true if we can reorder the registers because they are used commutatively
9999 Returns true iff we could generate a new instruction. */
10002 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10004 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10005 rtx mems[MAX_LDM_STM_OPS];
10006 int i, j, base_reg;
10008 HOST_WIDE_INT offset;
10009 int write_back = FALSE;
10013 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10014 &base_reg, &offset, !sort_regs);
10020 for (i = 0; i < nops - 1; i++)
10021 for (j = i + 1; j < nops; j++)
10022 if (regs[i] > regs[j])
10028 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10032 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10033 gcc_assert (ldm_case == 1 || ldm_case == 5);
10039 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10040 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10042 if (!TARGET_THUMB1)
10044 base_reg = regs[0];
10045 base_reg_rtx = newbase;
10049 for (i = 0; i < nops; i++)
10051 addr = plus_constant (base_reg_rtx, offset + i * 4);
10052 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10055 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10056 write_back ? offset + i * 4 : 0));
10060 /* Called from a peephole2 expander to turn a sequence of stores into an
10061 STM instruction. OPERANDS are the operands found by the peephole matcher;
10062 NOPS indicates how many separate stores we are trying to combine.
10063 Returns true iff we could generate a new instruction. */
10066 gen_stm_seq (rtx *operands, int nops)
10069 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10070 rtx mems[MAX_LDM_STM_OPS];
10073 HOST_WIDE_INT offset;
10074 int write_back = FALSE;
10077 bool base_reg_dies;
10079 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10080 mem_order, &base_reg, &offset, true);
10085 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10087 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10090 gcc_assert (base_reg_dies);
10096 gcc_assert (base_reg_dies);
10097 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10101 addr = plus_constant (base_reg_rtx, offset);
10103 for (i = 0; i < nops; i++)
10105 addr = plus_constant (base_reg_rtx, offset + i * 4);
10106 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10109 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10110 write_back ? offset + i * 4 : 0));
10114 /* Called from a peephole2 expander to turn a sequence of stores that are
10115 preceded by constant loads into an STM instruction. OPERANDS are the
10116 operands found by the peephole matcher; NOPS indicates how many
10117 separate stores we are trying to combine; there are 2 * NOPS
10118 instructions in the peephole.
10119 Returns true iff we could generate a new instruction. */
10122 gen_const_stm_seq (rtx *operands, int nops)
10124 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10125 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10126 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10127 rtx mems[MAX_LDM_STM_OPS];
10130 HOST_WIDE_INT offset;
10131 int write_back = FALSE;
10134 bool base_reg_dies;
10136 HARD_REG_SET allocated;
10138 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10139 mem_order, &base_reg, &offset, false);
10144 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10146 /* If the same register is used more than once, try to find a free
10148 CLEAR_HARD_REG_SET (allocated);
10149 for (i = 0; i < nops; i++)
10151 for (j = i + 1; j < nops; j++)
10152 if (regs[i] == regs[j])
10154 rtx t = peep2_find_free_register (0, nops * 2,
10155 TARGET_THUMB1 ? "l" : "r",
10156 SImode, &allocated);
10160 regs[i] = REGNO (t);
10164 /* Compute an ordering that maps the register numbers to an ascending
10167 for (i = 0; i < nops; i++)
10168 if (regs[i] < regs[reg_order[0]])
10171 for (i = 1; i < nops; i++)
10173 int this_order = reg_order[i - 1];
10174 for (j = 0; j < nops; j++)
10175 if (regs[j] > regs[reg_order[i - 1]]
10176 && (this_order == reg_order[i - 1]
10177 || regs[j] < regs[this_order]))
10179 reg_order[i] = this_order;
10182 /* Ensure that registers that must be live after the instruction end
10183 up with the correct value. */
10184 for (i = 0; i < nops; i++)
10186 int this_order = reg_order[i];
10187 if ((this_order != mem_order[i]
10188 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10189 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10193 /* Load the constants. */
10194 for (i = 0; i < nops; i++)
10196 rtx op = operands[2 * nops + mem_order[i]];
10197 sorted_regs[i] = regs[reg_order[i]];
10198 emit_move_insn (reg_rtxs[reg_order[i]], op);
10201 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10203 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10206 gcc_assert (base_reg_dies);
10212 gcc_assert (base_reg_dies);
10213 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10217 addr = plus_constant (base_reg_rtx, offset);
10219 for (i = 0; i < nops; i++)
10221 addr = plus_constant (base_reg_rtx, offset + i * 4);
10222 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10225 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10226 write_back ? offset + i * 4 : 0));
10231 arm_gen_movmemqi (rtx *operands)
10233 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10234 HOST_WIDE_INT srcoffset, dstoffset;
10236 rtx src, dst, srcbase, dstbase;
10237 rtx part_bytes_reg = NULL;
10240 if (GET_CODE (operands[2]) != CONST_INT
10241 || GET_CODE (operands[3]) != CONST_INT
10242 || INTVAL (operands[2]) > 64
10243 || INTVAL (operands[3]) & 3)
10246 dstbase = operands[0];
10247 srcbase = operands[1];
10249 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10250 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10252 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10253 out_words_to_go = INTVAL (operands[2]) / 4;
10254 last_bytes = INTVAL (operands[2]) & 3;
10255 dstoffset = srcoffset = 0;
10257 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10258 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10260 for (i = 0; in_words_to_go >= 2; i+=4)
10262 if (in_words_to_go > 4)
10263 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10264 TRUE, srcbase, &srcoffset));
10266 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10267 src, FALSE, srcbase,
10270 if (out_words_to_go)
10272 if (out_words_to_go > 4)
10273 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10274 TRUE, dstbase, &dstoffset));
10275 else if (out_words_to_go != 1)
10276 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10277 out_words_to_go, dst,
10280 dstbase, &dstoffset));
10283 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10284 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10285 if (last_bytes != 0)
10287 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10293 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10294 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10297 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10298 if (out_words_to_go)
10302 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10303 sreg = copy_to_reg (mem);
10305 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10306 emit_move_insn (mem, sreg);
10309 gcc_assert (!in_words_to_go); /* Sanity check */
10312 if (in_words_to_go)
10314 gcc_assert (in_words_to_go > 0);
10316 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10317 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10320 gcc_assert (!last_bytes || part_bytes_reg);
10322 if (BYTES_BIG_ENDIAN && last_bytes)
10324 rtx tmp = gen_reg_rtx (SImode);
10326 /* The bytes we want are in the top end of the word. */
10327 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10328 GEN_INT (8 * (4 - last_bytes))));
10329 part_bytes_reg = tmp;
10333 mem = adjust_automodify_address (dstbase, QImode,
10334 plus_constant (dst, last_bytes - 1),
10335 dstoffset + last_bytes - 1);
10336 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10340 tmp = gen_reg_rtx (SImode);
10341 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10342 part_bytes_reg = tmp;
10349 if (last_bytes > 1)
10351 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10352 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10356 rtx tmp = gen_reg_rtx (SImode);
10357 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10358 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10359 part_bytes_reg = tmp;
10366 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10367 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10374 /* Select a dominance comparison mode if possible for a test of the general
10375 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10376 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10377 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10378 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10379 In all cases OP will be either EQ or NE, but we don't need to know which
10380 here. If we are unable to support a dominance comparison we return
10381 CC mode. This will then fail to match for the RTL expressions that
10382 generate this call. */
10384 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10386 enum rtx_code cond1, cond2;
10389 /* Currently we will probably get the wrong result if the individual
10390 comparisons are not simple. This also ensures that it is safe to
10391 reverse a comparison if necessary. */
10392 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10394 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10398 /* The if_then_else variant of this tests the second condition if the
10399 first passes, but is true if the first fails. Reverse the first
10400 condition to get a true "inclusive-or" expression. */
10401 if (cond_or == DOM_CC_NX_OR_Y)
10402 cond1 = reverse_condition (cond1);
10404 /* If the comparisons are not equal, and one doesn't dominate the other,
10405 then we can't do this. */
10407 && !comparison_dominates_p (cond1, cond2)
10408 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10413 enum rtx_code temp = cond1;
10421 if (cond_or == DOM_CC_X_AND_Y)
10426 case EQ: return CC_DEQmode;
10427 case LE: return CC_DLEmode;
10428 case LEU: return CC_DLEUmode;
10429 case GE: return CC_DGEmode;
10430 case GEU: return CC_DGEUmode;
10431 default: gcc_unreachable ();
10435 if (cond_or == DOM_CC_X_AND_Y)
10447 gcc_unreachable ();
10451 if (cond_or == DOM_CC_X_AND_Y)
10463 gcc_unreachable ();
10467 if (cond_or == DOM_CC_X_AND_Y)
10468 return CC_DLTUmode;
10473 return CC_DLTUmode;
10475 return CC_DLEUmode;
10479 gcc_unreachable ();
10483 if (cond_or == DOM_CC_X_AND_Y)
10484 return CC_DGTUmode;
10489 return CC_DGTUmode;
10491 return CC_DGEUmode;
10495 gcc_unreachable ();
10498 /* The remaining cases only occur when both comparisons are the
10501 gcc_assert (cond1 == cond2);
10505 gcc_assert (cond1 == cond2);
10509 gcc_assert (cond1 == cond2);
10513 gcc_assert (cond1 == cond2);
10514 return CC_DLEUmode;
10517 gcc_assert (cond1 == cond2);
10518 return CC_DGEUmode;
10521 gcc_unreachable ();
10526 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10528 /* All floating point compares return CCFP if it is an equality
10529 comparison, and CCFPE otherwise. */
10530 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10550 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10555 gcc_unreachable ();
10559 /* A compare with a shifted operand. Because of canonicalization, the
10560 comparison will have to be swapped when we emit the assembler. */
10561 if (GET_MODE (y) == SImode
10562 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10563 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10564 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10565 || GET_CODE (x) == ROTATERT))
10568 /* This operation is performed swapped, but since we only rely on the Z
10569 flag we don't need an additional mode. */
10570 if (GET_MODE (y) == SImode
10571 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10572 && GET_CODE (x) == NEG
10573 && (op == EQ || op == NE))
10576 /* This is a special case that is used by combine to allow a
10577 comparison of a shifted byte load to be split into a zero-extend
10578 followed by a comparison of the shifted integer (only valid for
10579 equalities and unsigned inequalities). */
10580 if (GET_MODE (x) == SImode
10581 && GET_CODE (x) == ASHIFT
10582 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10583 && GET_CODE (XEXP (x, 0)) == SUBREG
10584 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10585 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10586 && (op == EQ || op == NE
10587 || op == GEU || op == GTU || op == LTU || op == LEU)
10588 && GET_CODE (y) == CONST_INT)
10591 /* A construct for a conditional compare, if the false arm contains
10592 0, then both conditions must be true, otherwise either condition
10593 must be true. Not all conditions are possible, so CCmode is
10594 returned if it can't be done. */
10595 if (GET_CODE (x) == IF_THEN_ELSE
10596 && (XEXP (x, 2) == const0_rtx
10597 || XEXP (x, 2) == const1_rtx)
10598 && COMPARISON_P (XEXP (x, 0))
10599 && COMPARISON_P (XEXP (x, 1)))
10600 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10601 INTVAL (XEXP (x, 2)));
10603 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10604 if (GET_CODE (x) == AND
10605 && COMPARISON_P (XEXP (x, 0))
10606 && COMPARISON_P (XEXP (x, 1)))
10607 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10610 if (GET_CODE (x) == IOR
10611 && COMPARISON_P (XEXP (x, 0))
10612 && COMPARISON_P (XEXP (x, 1)))
10613 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10616 /* An operation (on Thumb) where we want to test for a single bit.
10617 This is done by shifting that bit up into the top bit of a
10618 scratch register; we can then branch on the sign bit. */
10620 && GET_MODE (x) == SImode
10621 && (op == EQ || op == NE)
10622 && GET_CODE (x) == ZERO_EXTRACT
10623 && XEXP (x, 1) == const1_rtx)
10626 /* An operation that sets the condition codes as a side-effect, the
10627 V flag is not set correctly, so we can only use comparisons where
10628 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10630 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10631 if (GET_MODE (x) == SImode
10633 && (op == EQ || op == NE || op == LT || op == GE)
10634 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10635 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10636 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10637 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10638 || GET_CODE (x) == LSHIFTRT
10639 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10640 || GET_CODE (x) == ROTATERT
10641 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10642 return CC_NOOVmode;
10644 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10647 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10648 && GET_CODE (x) == PLUS
10649 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10652 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10654 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10656 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10663 /* A DImode comparison against zero can be implemented by
10664 or'ing the two halves together. */
10665 if (y == const0_rtx)
10668 /* We can do an equality test in three Thumb instructions. */
10678 /* DImode unsigned comparisons can be implemented by cmp +
10679 cmpeq without a scratch register. Not worth doing in
10690 /* DImode signed and unsigned comparisons can be implemented
10691 by cmp + sbcs with a scratch register, but that does not
10692 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10693 gcc_assert (op != EQ && op != NE);
10697 gcc_unreachable ();
10704 /* X and Y are two things to compare using CODE. Emit the compare insn and
10705 return the rtx for register 0 in the proper mode. FP means this is a
10706 floating point compare: I don't think that it is needed on the arm. */
10708 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10710 enum machine_mode mode;
10712 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10714 /* We might have X as a constant, Y as a register because of the predicates
10715 used for cmpdi. If so, force X to a register here. */
10716 if (dimode_comparison && !REG_P (x))
10717 x = force_reg (DImode, x);
10719 mode = SELECT_CC_MODE (code, x, y);
10720 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10722 if (dimode_comparison
10723 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10724 && mode != CC_CZmode)
10728 /* To compare two non-zero values for equality, XOR them and
10729 then compare against zero. Not used for ARM mode; there
10730 CC_CZmode is cheaper. */
10731 if (mode == CC_Zmode && y != const0_rtx)
10733 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10736 /* A scratch register is required. */
10737 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10738 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10739 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10742 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10747 /* Generate a sequence of insns that will generate the correct return
10748 address mask depending on the physical architecture that the program
10751 arm_gen_return_addr_mask (void)
10753 rtx reg = gen_reg_rtx (Pmode);
10755 emit_insn (gen_return_addr_mask (reg));
10760 arm_reload_in_hi (rtx *operands)
10762 rtx ref = operands[1];
10764 HOST_WIDE_INT offset = 0;
10766 if (GET_CODE (ref) == SUBREG)
10768 offset = SUBREG_BYTE (ref);
10769 ref = SUBREG_REG (ref);
10772 if (GET_CODE (ref) == REG)
10774 /* We have a pseudo which has been spilt onto the stack; there
10775 are two cases here: the first where there is a simple
10776 stack-slot replacement and a second where the stack-slot is
10777 out of range, or is used as a subreg. */
10778 if (reg_equiv_mem[REGNO (ref)])
10780 ref = reg_equiv_mem[REGNO (ref)];
10781 base = find_replacement (&XEXP (ref, 0));
10784 /* The slot is out of range, or was dressed up in a SUBREG. */
10785 base = reg_equiv_address[REGNO (ref)];
10788 base = find_replacement (&XEXP (ref, 0));
10790 /* Handle the case where the address is too complex to be offset by 1. */
10791 if (GET_CODE (base) == MINUS
10792 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10794 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10796 emit_set_insn (base_plus, base);
10799 else if (GET_CODE (base) == PLUS)
10801 /* The addend must be CONST_INT, or we would have dealt with it above. */
10802 HOST_WIDE_INT hi, lo;
10804 offset += INTVAL (XEXP (base, 1));
10805 base = XEXP (base, 0);
10807 /* Rework the address into a legal sequence of insns. */
10808 /* Valid range for lo is -4095 -> 4095 */
10811 : -((-offset) & 0xfff));
10813 /* Corner case, if lo is the max offset then we would be out of range
10814 once we have added the additional 1 below, so bump the msb into the
10815 pre-loading insn(s). */
10819 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10820 ^ (HOST_WIDE_INT) 0x80000000)
10821 - (HOST_WIDE_INT) 0x80000000);
10823 gcc_assert (hi + lo == offset);
10827 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10829 /* Get the base address; addsi3 knows how to handle constants
10830 that require more than one insn. */
10831 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10837 /* Operands[2] may overlap operands[0] (though it won't overlap
10838 operands[1]), that's why we asked for a DImode reg -- so we can
10839 use the bit that does not overlap. */
10840 if (REGNO (operands[2]) == REGNO (operands[0]))
10841 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10843 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10845 emit_insn (gen_zero_extendqisi2 (scratch,
10846 gen_rtx_MEM (QImode,
10847 plus_constant (base,
10849 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10850 gen_rtx_MEM (QImode,
10851 plus_constant (base,
10853 if (!BYTES_BIG_ENDIAN)
10854 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10855 gen_rtx_IOR (SImode,
10858 gen_rtx_SUBREG (SImode, operands[0], 0),
10862 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10863 gen_rtx_IOR (SImode,
10864 gen_rtx_ASHIFT (SImode, scratch,
10866 gen_rtx_SUBREG (SImode, operands[0], 0)));
10869 /* Handle storing a half-word to memory during reload by synthesizing as two
10870 byte stores. Take care not to clobber the input values until after we
10871 have moved them somewhere safe. This code assumes that if the DImode
10872 scratch in operands[2] overlaps either the input value or output address
10873 in some way, then that value must die in this insn (we absolutely need
10874 two scratch registers for some corner cases). */
10876 arm_reload_out_hi (rtx *operands)
10878 rtx ref = operands[0];
10879 rtx outval = operands[1];
10881 HOST_WIDE_INT offset = 0;
10883 if (GET_CODE (ref) == SUBREG)
10885 offset = SUBREG_BYTE (ref);
10886 ref = SUBREG_REG (ref);
10889 if (GET_CODE (ref) == REG)
10891 /* We have a pseudo which has been spilt onto the stack; there
10892 are two cases here: the first where there is a simple
10893 stack-slot replacement and a second where the stack-slot is
10894 out of range, or is used as a subreg. */
10895 if (reg_equiv_mem[REGNO (ref)])
10897 ref = reg_equiv_mem[REGNO (ref)];
10898 base = find_replacement (&XEXP (ref, 0));
10901 /* The slot is out of range, or was dressed up in a SUBREG. */
10902 base = reg_equiv_address[REGNO (ref)];
10905 base = find_replacement (&XEXP (ref, 0));
10907 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10909 /* Handle the case where the address is too complex to be offset by 1. */
10910 if (GET_CODE (base) == MINUS
10911 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10913 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10915 /* Be careful not to destroy OUTVAL. */
10916 if (reg_overlap_mentioned_p (base_plus, outval))
10918 /* Updating base_plus might destroy outval, see if we can
10919 swap the scratch and base_plus. */
10920 if (!reg_overlap_mentioned_p (scratch, outval))
10923 scratch = base_plus;
10928 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10930 /* Be conservative and copy OUTVAL into the scratch now,
10931 this should only be necessary if outval is a subreg
10932 of something larger than a word. */
10933 /* XXX Might this clobber base? I can't see how it can,
10934 since scratch is known to overlap with OUTVAL, and
10935 must be wider than a word. */
10936 emit_insn (gen_movhi (scratch_hi, outval));
10937 outval = scratch_hi;
10941 emit_set_insn (base_plus, base);
10944 else if (GET_CODE (base) == PLUS)
10946 /* The addend must be CONST_INT, or we would have dealt with it above. */
10947 HOST_WIDE_INT hi, lo;
10949 offset += INTVAL (XEXP (base, 1));
10950 base = XEXP (base, 0);
10952 /* Rework the address into a legal sequence of insns. */
10953 /* Valid range for lo is -4095 -> 4095 */
10956 : -((-offset) & 0xfff));
10958 /* Corner case, if lo is the max offset then we would be out of range
10959 once we have added the additional 1 below, so bump the msb into the
10960 pre-loading insn(s). */
10964 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10965 ^ (HOST_WIDE_INT) 0x80000000)
10966 - (HOST_WIDE_INT) 0x80000000);
10968 gcc_assert (hi + lo == offset);
10972 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10974 /* Be careful not to destroy OUTVAL. */
10975 if (reg_overlap_mentioned_p (base_plus, outval))
10977 /* Updating base_plus might destroy outval, see if we
10978 can swap the scratch and base_plus. */
10979 if (!reg_overlap_mentioned_p (scratch, outval))
10982 scratch = base_plus;
10987 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10989 /* Be conservative and copy outval into scratch now,
10990 this should only be necessary if outval is a
10991 subreg of something larger than a word. */
10992 /* XXX Might this clobber base? I can't see how it
10993 can, since scratch is known to overlap with
10995 emit_insn (gen_movhi (scratch_hi, outval));
10996 outval = scratch_hi;
11000 /* Get the base address; addsi3 knows how to handle constants
11001 that require more than one insn. */
11002 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11008 if (BYTES_BIG_ENDIAN)
11010 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11011 plus_constant (base, offset + 1)),
11012 gen_lowpart (QImode, outval)));
11013 emit_insn (gen_lshrsi3 (scratch,
11014 gen_rtx_SUBREG (SImode, outval, 0),
11016 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11017 gen_lowpart (QImode, scratch)));
11021 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11022 gen_lowpart (QImode, outval)));
11023 emit_insn (gen_lshrsi3 (scratch,
11024 gen_rtx_SUBREG (SImode, outval, 0),
11026 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11027 plus_constant (base, offset + 1)),
11028 gen_lowpart (QImode, scratch)));
11032 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11033 (padded to the size of a word) should be passed in a register. */
11036 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11038 if (TARGET_AAPCS_BASED)
11039 return must_pass_in_stack_var_size (mode, type);
11041 return must_pass_in_stack_var_size_or_pad (mode, type);
11045 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11046 Return true if an argument passed on the stack should be padded upwards,
11047 i.e. if the least-significant byte has useful data.
11048 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11049 aggregate types are placed in the lowest memory address. */
11052 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11054 if (!TARGET_AAPCS_BASED)
11055 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11057 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11064 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11065 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11066 byte of the register has useful data, and return the opposite if the
11067 most significant byte does.
11068 For AAPCS, small aggregates and small complex types are always padded
11072 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11073 tree type, int first ATTRIBUTE_UNUSED)
11075 if (TARGET_AAPCS_BASED
11076 && BYTES_BIG_ENDIAN
11077 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11078 && int_size_in_bytes (type) <= 4)
11081 /* Otherwise, use default padding. */
11082 return !BYTES_BIG_ENDIAN;
11086 /* Print a symbolic form of X to the debug file, F. */
11088 arm_print_value (FILE *f, rtx x)
11090 switch (GET_CODE (x))
11093 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11097 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11105 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11107 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11108 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11116 fprintf (f, "\"%s\"", XSTR (x, 0));
11120 fprintf (f, "`%s'", XSTR (x, 0));
11124 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11128 arm_print_value (f, XEXP (x, 0));
11132 arm_print_value (f, XEXP (x, 0));
11134 arm_print_value (f, XEXP (x, 1));
11142 fprintf (f, "????");
11147 /* Routines for manipulation of the constant pool. */
11149 /* Arm instructions cannot load a large constant directly into a
11150 register; they have to come from a pc relative load. The constant
11151 must therefore be placed in the addressable range of the pc
11152 relative load. Depending on the precise pc relative load
11153 instruction the range is somewhere between 256 bytes and 4k. This
11154 means that we often have to dump a constant inside a function, and
11155 generate code to branch around it.
11157 It is important to minimize this, since the branches will slow
11158 things down and make the code larger.
11160 Normally we can hide the table after an existing unconditional
11161 branch so that there is no interruption of the flow, but in the
11162 worst case the code looks like this:
11180 We fix this by performing a scan after scheduling, which notices
11181 which instructions need to have their operands fetched from the
11182 constant table and builds the table.
11184 The algorithm starts by building a table of all the constants that
11185 need fixing up and all the natural barriers in the function (places
11186 where a constant table can be dropped without breaking the flow).
11187 For each fixup we note how far the pc-relative replacement will be
11188 able to reach and the offset of the instruction into the function.
11190 Having built the table we then group the fixes together to form
11191 tables that are as large as possible (subject to addressing
11192 constraints) and emit each table of constants after the last
11193 barrier that is within range of all the instructions in the group.
11194 If a group does not contain a barrier, then we forcibly create one
11195 by inserting a jump instruction into the flow. Once the table has
11196 been inserted, the insns are then modified to reference the
11197 relevant entry in the pool.
11199 Possible enhancements to the algorithm (not implemented) are:
11201 1) For some processors and object formats, there may be benefit in
11202 aligning the pools to the start of cache lines; this alignment
11203 would need to be taken into account when calculating addressability
11206 /* These typedefs are located at the start of this file, so that
11207 they can be used in the prototypes there. This comment is to
11208 remind readers of that fact so that the following structures
11209 can be understood more easily.
11211 typedef struct minipool_node Mnode;
11212 typedef struct minipool_fixup Mfix; */
11214 struct minipool_node
11216 /* Doubly linked chain of entries. */
11219 /* The maximum offset into the code that this entry can be placed. While
11220 pushing fixes for forward references, all entries are sorted in order
11221 of increasing max_address. */
11222 HOST_WIDE_INT max_address;
11223 /* Similarly for an entry inserted for a backwards ref. */
11224 HOST_WIDE_INT min_address;
11225 /* The number of fixes referencing this entry. This can become zero
11226 if we "unpush" an entry. In this case we ignore the entry when we
11227 come to emit the code. */
11229 /* The offset from the start of the minipool. */
11230 HOST_WIDE_INT offset;
11231 /* The value in table. */
11233 /* The mode of value. */
11234 enum machine_mode mode;
11235 /* The size of the value. With iWMMXt enabled
11236 sizes > 4 also imply an alignment of 8-bytes. */
11240 struct minipool_fixup
11244 HOST_WIDE_INT address;
11246 enum machine_mode mode;
11250 HOST_WIDE_INT forwards;
11251 HOST_WIDE_INT backwards;
11254 /* Fixes less than a word need padding out to a word boundary. */
11255 #define MINIPOOL_FIX_SIZE(mode) \
11256 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11258 static Mnode * minipool_vector_head;
11259 static Mnode * minipool_vector_tail;
11260 static rtx minipool_vector_label;
11261 static int minipool_pad;
11263 /* The linked list of all minipool fixes required for this function. */
11264 Mfix * minipool_fix_head;
11265 Mfix * minipool_fix_tail;
11266 /* The fix entry for the current minipool, once it has been placed. */
11267 Mfix * minipool_barrier;
11269 /* Determines if INSN is the start of a jump table. Returns the end
11270 of the TABLE or NULL_RTX. */
11272 is_jump_table (rtx insn)
11276 if (GET_CODE (insn) == JUMP_INSN
11277 && JUMP_LABEL (insn) != NULL
11278 && ((table = next_real_insn (JUMP_LABEL (insn)))
11279 == next_real_insn (insn))
11281 && GET_CODE (table) == JUMP_INSN
11282 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11283 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11289 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11290 #define JUMP_TABLES_IN_TEXT_SECTION 0
11293 static HOST_WIDE_INT
11294 get_jump_table_size (rtx insn)
11296 /* ADDR_VECs only take room if read-only data does into the text
11298 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11300 rtx body = PATTERN (insn);
11301 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11302 HOST_WIDE_INT size;
11303 HOST_WIDE_INT modesize;
11305 modesize = GET_MODE_SIZE (GET_MODE (body));
11306 size = modesize * XVECLEN (body, elt);
11310 /* Round up size of TBB table to a halfword boundary. */
11311 size = (size + 1) & ~(HOST_WIDE_INT)1;
11314 /* No padding necessary for TBH. */
11317 /* Add two bytes for alignment on Thumb. */
11322 gcc_unreachable ();
11330 /* Move a minipool fix MP from its current location to before MAX_MP.
11331 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11332 constraints may need updating. */
11334 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11335 HOST_WIDE_INT max_address)
11337 /* The code below assumes these are different. */
11338 gcc_assert (mp != max_mp);
11340 if (max_mp == NULL)
11342 if (max_address < mp->max_address)
11343 mp->max_address = max_address;
11347 if (max_address > max_mp->max_address - mp->fix_size)
11348 mp->max_address = max_mp->max_address - mp->fix_size;
11350 mp->max_address = max_address;
11352 /* Unlink MP from its current position. Since max_mp is non-null,
11353 mp->prev must be non-null. */
11354 mp->prev->next = mp->next;
11355 if (mp->next != NULL)
11356 mp->next->prev = mp->prev;
11358 minipool_vector_tail = mp->prev;
11360 /* Re-insert it before MAX_MP. */
11362 mp->prev = max_mp->prev;
11365 if (mp->prev != NULL)
11366 mp->prev->next = mp;
11368 minipool_vector_head = mp;
11371 /* Save the new entry. */
11374 /* Scan over the preceding entries and adjust their addresses as
11376 while (mp->prev != NULL
11377 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11379 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11386 /* Add a constant to the minipool for a forward reference. Returns the
11387 node added or NULL if the constant will not fit in this pool. */
11389 add_minipool_forward_ref (Mfix *fix)
11391 /* If set, max_mp is the first pool_entry that has a lower
11392 constraint than the one we are trying to add. */
11393 Mnode * max_mp = NULL;
11394 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11397 /* If the minipool starts before the end of FIX->INSN then this FIX
11398 can not be placed into the current pool. Furthermore, adding the
11399 new constant pool entry may cause the pool to start FIX_SIZE bytes
11401 if (minipool_vector_head &&
11402 (fix->address + get_attr_length (fix->insn)
11403 >= minipool_vector_head->max_address - fix->fix_size))
11406 /* Scan the pool to see if a constant with the same value has
11407 already been added. While we are doing this, also note the
11408 location where we must insert the constant if it doesn't already
11410 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11412 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11413 && fix->mode == mp->mode
11414 && (GET_CODE (fix->value) != CODE_LABEL
11415 || (CODE_LABEL_NUMBER (fix->value)
11416 == CODE_LABEL_NUMBER (mp->value)))
11417 && rtx_equal_p (fix->value, mp->value))
11419 /* More than one fix references this entry. */
11421 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11424 /* Note the insertion point if necessary. */
11426 && mp->max_address > max_address)
11429 /* If we are inserting an 8-bytes aligned quantity and
11430 we have not already found an insertion point, then
11431 make sure that all such 8-byte aligned quantities are
11432 placed at the start of the pool. */
11433 if (ARM_DOUBLEWORD_ALIGN
11435 && fix->fix_size >= 8
11436 && mp->fix_size < 8)
11439 max_address = mp->max_address;
11443 /* The value is not currently in the minipool, so we need to create
11444 a new entry for it. If MAX_MP is NULL, the entry will be put on
11445 the end of the list since the placement is less constrained than
11446 any existing entry. Otherwise, we insert the new fix before
11447 MAX_MP and, if necessary, adjust the constraints on the other
11450 mp->fix_size = fix->fix_size;
11451 mp->mode = fix->mode;
11452 mp->value = fix->value;
11454 /* Not yet required for a backwards ref. */
11455 mp->min_address = -65536;
11457 if (max_mp == NULL)
11459 mp->max_address = max_address;
11461 mp->prev = minipool_vector_tail;
11463 if (mp->prev == NULL)
11465 minipool_vector_head = mp;
11466 minipool_vector_label = gen_label_rtx ();
11469 mp->prev->next = mp;
11471 minipool_vector_tail = mp;
11475 if (max_address > max_mp->max_address - mp->fix_size)
11476 mp->max_address = max_mp->max_address - mp->fix_size;
11478 mp->max_address = max_address;
11481 mp->prev = max_mp->prev;
11483 if (mp->prev != NULL)
11484 mp->prev->next = mp;
11486 minipool_vector_head = mp;
11489 /* Save the new entry. */
11492 /* Scan over the preceding entries and adjust their addresses as
11494 while (mp->prev != NULL
11495 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11497 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11505 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11506 HOST_WIDE_INT min_address)
11508 HOST_WIDE_INT offset;
11510 /* The code below assumes these are different. */
11511 gcc_assert (mp != min_mp);
11513 if (min_mp == NULL)
11515 if (min_address > mp->min_address)
11516 mp->min_address = min_address;
11520 /* We will adjust this below if it is too loose. */
11521 mp->min_address = min_address;
11523 /* Unlink MP from its current position. Since min_mp is non-null,
11524 mp->next must be non-null. */
11525 mp->next->prev = mp->prev;
11526 if (mp->prev != NULL)
11527 mp->prev->next = mp->next;
11529 minipool_vector_head = mp->next;
11531 /* Reinsert it after MIN_MP. */
11533 mp->next = min_mp->next;
11535 if (mp->next != NULL)
11536 mp->next->prev = mp;
11538 minipool_vector_tail = mp;
11544 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11546 mp->offset = offset;
11547 if (mp->refcount > 0)
11548 offset += mp->fix_size;
11550 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11551 mp->next->min_address = mp->min_address + mp->fix_size;
11557 /* Add a constant to the minipool for a backward reference. Returns the
11558 node added or NULL if the constant will not fit in this pool.
11560 Note that the code for insertion for a backwards reference can be
11561 somewhat confusing because the calculated offsets for each fix do
11562 not take into account the size of the pool (which is still under
11565 add_minipool_backward_ref (Mfix *fix)
11567 /* If set, min_mp is the last pool_entry that has a lower constraint
11568 than the one we are trying to add. */
11569 Mnode *min_mp = NULL;
11570 /* This can be negative, since it is only a constraint. */
11571 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11574 /* If we can't reach the current pool from this insn, or if we can't
11575 insert this entry at the end of the pool without pushing other
11576 fixes out of range, then we don't try. This ensures that we
11577 can't fail later on. */
11578 if (min_address >= minipool_barrier->address
11579 || (minipool_vector_tail->min_address + fix->fix_size
11580 >= minipool_barrier->address))
11583 /* Scan the pool to see if a constant with the same value has
11584 already been added. While we are doing this, also note the
11585 location where we must insert the constant if it doesn't already
11587 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11589 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11590 && fix->mode == mp->mode
11591 && (GET_CODE (fix->value) != CODE_LABEL
11592 || (CODE_LABEL_NUMBER (fix->value)
11593 == CODE_LABEL_NUMBER (mp->value)))
11594 && rtx_equal_p (fix->value, mp->value)
11595 /* Check that there is enough slack to move this entry to the
11596 end of the table (this is conservative). */
11597 && (mp->max_address
11598 > (minipool_barrier->address
11599 + minipool_vector_tail->offset
11600 + minipool_vector_tail->fix_size)))
11603 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11606 if (min_mp != NULL)
11607 mp->min_address += fix->fix_size;
11610 /* Note the insertion point if necessary. */
11611 if (mp->min_address < min_address)
11613 /* For now, we do not allow the insertion of 8-byte alignment
11614 requiring nodes anywhere but at the start of the pool. */
11615 if (ARM_DOUBLEWORD_ALIGN
11616 && fix->fix_size >= 8 && mp->fix_size < 8)
11621 else if (mp->max_address
11622 < minipool_barrier->address + mp->offset + fix->fix_size)
11624 /* Inserting before this entry would push the fix beyond
11625 its maximum address (which can happen if we have
11626 re-located a forwards fix); force the new fix to come
11628 if (ARM_DOUBLEWORD_ALIGN
11629 && fix->fix_size >= 8 && mp->fix_size < 8)
11634 min_address = mp->min_address + fix->fix_size;
11637 /* Do not insert a non-8-byte aligned quantity before 8-byte
11638 aligned quantities. */
11639 else if (ARM_DOUBLEWORD_ALIGN
11640 && fix->fix_size < 8
11641 && mp->fix_size >= 8)
11644 min_address = mp->min_address + fix->fix_size;
11649 /* We need to create a new entry. */
11651 mp->fix_size = fix->fix_size;
11652 mp->mode = fix->mode;
11653 mp->value = fix->value;
11655 mp->max_address = minipool_barrier->address + 65536;
11657 mp->min_address = min_address;
11659 if (min_mp == NULL)
11662 mp->next = minipool_vector_head;
11664 if (mp->next == NULL)
11666 minipool_vector_tail = mp;
11667 minipool_vector_label = gen_label_rtx ();
11670 mp->next->prev = mp;
11672 minipool_vector_head = mp;
11676 mp->next = min_mp->next;
11680 if (mp->next != NULL)
11681 mp->next->prev = mp;
11683 minipool_vector_tail = mp;
11686 /* Save the new entry. */
11694 /* Scan over the following entries and adjust their offsets. */
11695 while (mp->next != NULL)
11697 if (mp->next->min_address < mp->min_address + mp->fix_size)
11698 mp->next->min_address = mp->min_address + mp->fix_size;
11701 mp->next->offset = mp->offset + mp->fix_size;
11703 mp->next->offset = mp->offset;
11712 assign_minipool_offsets (Mfix *barrier)
11714 HOST_WIDE_INT offset = 0;
11717 minipool_barrier = barrier;
11719 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11721 mp->offset = offset;
11723 if (mp->refcount > 0)
11724 offset += mp->fix_size;
11728 /* Output the literal table */
11730 dump_minipool (rtx scan)
11736 if (ARM_DOUBLEWORD_ALIGN)
11737 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11738 if (mp->refcount > 0 && mp->fix_size >= 8)
11745 fprintf (dump_file,
11746 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11747 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11749 scan = emit_label_after (gen_label_rtx (), scan);
11750 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11751 scan = emit_label_after (minipool_vector_label, scan);
11753 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11755 if (mp->refcount > 0)
11759 fprintf (dump_file,
11760 ";; Offset %u, min %ld, max %ld ",
11761 (unsigned) mp->offset, (unsigned long) mp->min_address,
11762 (unsigned long) mp->max_address);
11763 arm_print_value (dump_file, mp->value);
11764 fputc ('\n', dump_file);
11767 switch (mp->fix_size)
11769 #ifdef HAVE_consttable_1
11771 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11775 #ifdef HAVE_consttable_2
11777 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11781 #ifdef HAVE_consttable_4
11783 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11787 #ifdef HAVE_consttable_8
11789 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11793 #ifdef HAVE_consttable_16
11795 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11800 gcc_unreachable ();
11808 minipool_vector_head = minipool_vector_tail = NULL;
11809 scan = emit_insn_after (gen_consttable_end (), scan);
11810 scan = emit_barrier_after (scan);
11813 /* Return the cost of forcibly inserting a barrier after INSN. */
11815 arm_barrier_cost (rtx insn)
11817 /* Basing the location of the pool on the loop depth is preferable,
11818 but at the moment, the basic block information seems to be
11819 corrupt by this stage of the compilation. */
11820 int base_cost = 50;
11821 rtx next = next_nonnote_insn (insn);
11823 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11826 switch (GET_CODE (insn))
11829 /* It will always be better to place the table before the label, rather
11838 return base_cost - 10;
11841 return base_cost + 10;
11845 /* Find the best place in the insn stream in the range
11846 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11847 Create the barrier by inserting a jump and add a new fix entry for
11850 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11852 HOST_WIDE_INT count = 0;
11854 rtx from = fix->insn;
11855 /* The instruction after which we will insert the jump. */
11856 rtx selected = NULL;
11858 /* The address at which the jump instruction will be placed. */
11859 HOST_WIDE_INT selected_address;
11861 HOST_WIDE_INT max_count = max_address - fix->address;
11862 rtx label = gen_label_rtx ();
11864 selected_cost = arm_barrier_cost (from);
11865 selected_address = fix->address;
11867 while (from && count < max_count)
11872 /* This code shouldn't have been called if there was a natural barrier
11874 gcc_assert (GET_CODE (from) != BARRIER);
11876 /* Count the length of this insn. */
11877 count += get_attr_length (from);
11879 /* If there is a jump table, add its length. */
11880 tmp = is_jump_table (from);
11883 count += get_jump_table_size (tmp);
11885 /* Jump tables aren't in a basic block, so base the cost on
11886 the dispatch insn. If we select this location, we will
11887 still put the pool after the table. */
11888 new_cost = arm_barrier_cost (from);
11890 if (count < max_count
11891 && (!selected || new_cost <= selected_cost))
11894 selected_cost = new_cost;
11895 selected_address = fix->address + count;
11898 /* Continue after the dispatch table. */
11899 from = NEXT_INSN (tmp);
11903 new_cost = arm_barrier_cost (from);
11905 if (count < max_count
11906 && (!selected || new_cost <= selected_cost))
11909 selected_cost = new_cost;
11910 selected_address = fix->address + count;
11913 from = NEXT_INSN (from);
11916 /* Make sure that we found a place to insert the jump. */
11917 gcc_assert (selected);
11919 /* Create a new JUMP_INSN that branches around a barrier. */
11920 from = emit_jump_insn_after (gen_jump (label), selected);
11921 JUMP_LABEL (from) = label;
11922 barrier = emit_barrier_after (from);
11923 emit_label_after (label, barrier);
11925 /* Create a minipool barrier entry for the new barrier. */
11926 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11927 new_fix->insn = barrier;
11928 new_fix->address = selected_address;
11929 new_fix->next = fix->next;
11930 fix->next = new_fix;
11935 /* Record that there is a natural barrier in the insn stream at
11938 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11940 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11943 fix->address = address;
11946 if (minipool_fix_head != NULL)
11947 minipool_fix_tail->next = fix;
11949 minipool_fix_head = fix;
11951 minipool_fix_tail = fix;
11954 /* Record INSN, which will need fixing up to load a value from the
11955 minipool. ADDRESS is the offset of the insn since the start of the
11956 function; LOC is a pointer to the part of the insn which requires
11957 fixing; VALUE is the constant that must be loaded, which is of type
11960 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11961 enum machine_mode mode, rtx value)
11963 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11966 fix->address = address;
11969 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11970 fix->value = value;
11971 fix->forwards = get_attr_pool_range (insn);
11972 fix->backwards = get_attr_neg_pool_range (insn);
11973 fix->minipool = NULL;
11975 /* If an insn doesn't have a range defined for it, then it isn't
11976 expecting to be reworked by this code. Better to stop now than
11977 to generate duff assembly code. */
11978 gcc_assert (fix->forwards || fix->backwards);
11980 /* If an entry requires 8-byte alignment then assume all constant pools
11981 require 4 bytes of padding. Trying to do this later on a per-pool
11982 basis is awkward because existing pool entries have to be modified. */
11983 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11988 fprintf (dump_file,
11989 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11990 GET_MODE_NAME (mode),
11991 INSN_UID (insn), (unsigned long) address,
11992 -1 * (long)fix->backwards, (long)fix->forwards);
11993 arm_print_value (dump_file, fix->value);
11994 fprintf (dump_file, "\n");
11997 /* Add it to the chain of fixes. */
12000 if (minipool_fix_head != NULL)
12001 minipool_fix_tail->next = fix;
12003 minipool_fix_head = fix;
12005 minipool_fix_tail = fix;
12008 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12009 Returns the number of insns needed, or 99 if we don't know how to
12012 arm_const_double_inline_cost (rtx val)
12014 rtx lowpart, highpart;
12015 enum machine_mode mode;
12017 mode = GET_MODE (val);
12019 if (mode == VOIDmode)
12022 gcc_assert (GET_MODE_SIZE (mode) == 8);
12024 lowpart = gen_lowpart (SImode, val);
12025 highpart = gen_highpart_mode (SImode, mode, val);
12027 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12028 gcc_assert (GET_CODE (highpart) == CONST_INT);
12030 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12031 NULL_RTX, NULL_RTX, 0, 0)
12032 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12033 NULL_RTX, NULL_RTX, 0, 0));
12036 /* Return true if it is worthwhile to split a 64-bit constant into two
12037 32-bit operations. This is the case if optimizing for size, or
12038 if we have load delay slots, or if one 32-bit part can be done with
12039 a single data operation. */
12041 arm_const_double_by_parts (rtx val)
12043 enum machine_mode mode = GET_MODE (val);
12046 if (optimize_size || arm_ld_sched)
12049 if (mode == VOIDmode)
12052 part = gen_highpart_mode (SImode, mode, val);
12054 gcc_assert (GET_CODE (part) == CONST_INT);
12056 if (const_ok_for_arm (INTVAL (part))
12057 || const_ok_for_arm (~INTVAL (part)))
12060 part = gen_lowpart (SImode, val);
12062 gcc_assert (GET_CODE (part) == CONST_INT);
12064 if (const_ok_for_arm (INTVAL (part))
12065 || const_ok_for_arm (~INTVAL (part)))
12071 /* Return true if it is possible to inline both the high and low parts
12072 of a 64-bit constant into 32-bit data processing instructions. */
12074 arm_const_double_by_immediates (rtx val)
12076 enum machine_mode mode = GET_MODE (val);
12079 if (mode == VOIDmode)
12082 part = gen_highpart_mode (SImode, mode, val);
12084 gcc_assert (GET_CODE (part) == CONST_INT);
12086 if (!const_ok_for_arm (INTVAL (part)))
12089 part = gen_lowpart (SImode, val);
12091 gcc_assert (GET_CODE (part) == CONST_INT);
12093 if (!const_ok_for_arm (INTVAL (part)))
12099 /* Scan INSN and note any of its operands that need fixing.
12100 If DO_PUSHES is false we do not actually push any of the fixups
12101 needed. The function returns TRUE if any fixups were needed/pushed.
12102 This is used by arm_memory_load_p() which needs to know about loads
12103 of constants that will be converted into minipool loads. */
12105 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12107 bool result = false;
12110 extract_insn (insn);
12112 if (!constrain_operands (1))
12113 fatal_insn_not_found (insn);
12115 if (recog_data.n_alternatives == 0)
12118 /* Fill in recog_op_alt with information about the constraints of
12120 preprocess_constraints ();
12122 for (opno = 0; opno < recog_data.n_operands; opno++)
12124 /* Things we need to fix can only occur in inputs. */
12125 if (recog_data.operand_type[opno] != OP_IN)
12128 /* If this alternative is a memory reference, then any mention
12129 of constants in this alternative is really to fool reload
12130 into allowing us to accept one there. We need to fix them up
12131 now so that we output the right code. */
12132 if (recog_op_alt[opno][which_alternative].memory_ok)
12134 rtx op = recog_data.operand[opno];
12136 if (CONSTANT_P (op))
12139 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12140 recog_data.operand_mode[opno], op);
12143 else if (GET_CODE (op) == MEM
12144 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12145 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12149 rtx cop = avoid_constant_pool_reference (op);
12151 /* Casting the address of something to a mode narrower
12152 than a word can cause avoid_constant_pool_reference()
12153 to return the pool reference itself. That's no good to
12154 us here. Lets just hope that we can use the
12155 constant pool value directly. */
12157 cop = get_pool_constant (XEXP (op, 0));
12159 push_minipool_fix (insn, address,
12160 recog_data.operand_loc[opno],
12161 recog_data.operand_mode[opno], cop);
12172 /* Convert instructions to their cc-clobbering variant if possible, since
12173 that allows us to use smaller encodings. */
12176 thumb2_reorg (void)
12181 INIT_REG_SET (&live);
12183 /* We are freeing block_for_insn in the toplev to keep compatibility
12184 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12185 compute_bb_for_insn ();
12192 COPY_REG_SET (&live, DF_LR_OUT (bb));
12193 df_simulate_initialize_backwards (bb, &live);
12194 FOR_BB_INSNS_REVERSE (bb, insn)
12196 if (NONJUMP_INSN_P (insn)
12197 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12199 rtx pat = PATTERN (insn);
12200 if (GET_CODE (pat) == SET
12201 && low_register_operand (XEXP (pat, 0), SImode)
12202 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12203 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12204 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12206 rtx dst = XEXP (pat, 0);
12207 rtx src = XEXP (pat, 1);
12208 rtx op0 = XEXP (src, 0);
12209 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12210 ? XEXP (src, 1) : NULL);
12212 if (rtx_equal_p (dst, op0)
12213 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12215 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12216 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12217 rtvec vec = gen_rtvec (2, pat, clobber);
12219 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12220 INSN_CODE (insn) = -1;
12222 /* We can also handle a commutative operation where the
12223 second operand matches the destination. */
12224 else if (op1 && rtx_equal_p (dst, op1))
12226 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12227 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12230 src = copy_rtx (src);
12231 XEXP (src, 0) = op1;
12232 XEXP (src, 1) = op0;
12233 pat = gen_rtx_SET (VOIDmode, dst, src);
12234 vec = gen_rtvec (2, pat, clobber);
12235 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12236 INSN_CODE (insn) = -1;
12241 if (NONDEBUG_INSN_P (insn))
12242 df_simulate_one_insn_backwards (bb, insn, &live);
12246 CLEAR_REG_SET (&live);
12249 /* Gcc puts the pool in the wrong place for ARM, since we can only
12250 load addresses a limited distance around the pc. We do some
12251 special munging to move the constant pool values to the correct
12252 point in the code. */
12257 HOST_WIDE_INT address = 0;
12263 minipool_fix_head = minipool_fix_tail = NULL;
12265 /* The first insn must always be a note, or the code below won't
12266 scan it properly. */
12267 insn = get_insns ();
12268 gcc_assert (GET_CODE (insn) == NOTE);
12271 /* Scan all the insns and record the operands that will need fixing. */
12272 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12274 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12275 && (arm_cirrus_insn_p (insn)
12276 || GET_CODE (insn) == JUMP_INSN
12277 || arm_memory_load_p (insn)))
12278 cirrus_reorg (insn);
12280 if (GET_CODE (insn) == BARRIER)
12281 push_minipool_barrier (insn, address);
12282 else if (INSN_P (insn))
12286 note_invalid_constants (insn, address, true);
12287 address += get_attr_length (insn);
12289 /* If the insn is a vector jump, add the size of the table
12290 and skip the table. */
12291 if ((table = is_jump_table (insn)) != NULL)
12293 address += get_jump_table_size (table);
12299 fix = minipool_fix_head;
12301 /* Now scan the fixups and perform the required changes. */
12306 Mfix * last_added_fix;
12307 Mfix * last_barrier = NULL;
12310 /* Skip any further barriers before the next fix. */
12311 while (fix && GET_CODE (fix->insn) == BARRIER)
12314 /* No more fixes. */
12318 last_added_fix = NULL;
12320 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12322 if (GET_CODE (ftmp->insn) == BARRIER)
12324 if (ftmp->address >= minipool_vector_head->max_address)
12327 last_barrier = ftmp;
12329 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12332 last_added_fix = ftmp; /* Keep track of the last fix added. */
12335 /* If we found a barrier, drop back to that; any fixes that we
12336 could have reached but come after the barrier will now go in
12337 the next mini-pool. */
12338 if (last_barrier != NULL)
12340 /* Reduce the refcount for those fixes that won't go into this
12342 for (fdel = last_barrier->next;
12343 fdel && fdel != ftmp;
12346 fdel->minipool->refcount--;
12347 fdel->minipool = NULL;
12350 ftmp = last_barrier;
12354 /* ftmp is first fix that we can't fit into this pool and
12355 there no natural barriers that we could use. Insert a
12356 new barrier in the code somewhere between the previous
12357 fix and this one, and arrange to jump around it. */
12358 HOST_WIDE_INT max_address;
12360 /* The last item on the list of fixes must be a barrier, so
12361 we can never run off the end of the list of fixes without
12362 last_barrier being set. */
12365 max_address = minipool_vector_head->max_address;
12366 /* Check that there isn't another fix that is in range that
12367 we couldn't fit into this pool because the pool was
12368 already too large: we need to put the pool before such an
12369 instruction. The pool itself may come just after the
12370 fix because create_fix_barrier also allows space for a
12371 jump instruction. */
12372 if (ftmp->address < max_address)
12373 max_address = ftmp->address + 1;
12375 last_barrier = create_fix_barrier (last_added_fix, max_address);
12378 assign_minipool_offsets (last_barrier);
12382 if (GET_CODE (ftmp->insn) != BARRIER
12383 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12390 /* Scan over the fixes we have identified for this pool, fixing them
12391 up and adding the constants to the pool itself. */
12392 for (this_fix = fix; this_fix && ftmp != this_fix;
12393 this_fix = this_fix->next)
12394 if (GET_CODE (this_fix->insn) != BARRIER)
12397 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12398 minipool_vector_label),
12399 this_fix->minipool->offset);
12400 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12403 dump_minipool (last_barrier->insn);
12407 /* From now on we must synthesize any constants that we can't handle
12408 directly. This can happen if the RTL gets split during final
12409 instruction generation. */
12410 after_arm_reorg = 1;
12412 /* Free the minipool memory. */
12413 obstack_free (&minipool_obstack, minipool_startobj);
12416 /* Routines to output assembly language. */
12418 /* If the rtx is the correct value then return the string of the number.
12419 In this way we can ensure that valid double constants are generated even
12420 when cross compiling. */
12422 fp_immediate_constant (rtx x)
12427 if (!fp_consts_inited)
12430 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12431 for (i = 0; i < 8; i++)
12432 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12433 return strings_fp[i];
12435 gcc_unreachable ();
12438 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12439 static const char *
12440 fp_const_from_val (REAL_VALUE_TYPE *r)
12444 if (!fp_consts_inited)
12447 for (i = 0; i < 8; i++)
12448 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12449 return strings_fp[i];
12451 gcc_unreachable ();
12454 /* Output the operands of a LDM/STM instruction to STREAM.
12455 MASK is the ARM register set mask of which only bits 0-15 are important.
12456 REG is the base register, either the frame pointer or the stack pointer,
12457 INSTR is the possibly suffixed load or store instruction.
12458 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12461 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12462 unsigned long mask, int rfe)
12465 bool not_first = FALSE;
12467 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12468 fputc ('\t', stream);
12469 asm_fprintf (stream, instr, reg);
12470 fputc ('{', stream);
12472 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12473 if (mask & (1 << i))
12476 fprintf (stream, ", ");
12478 asm_fprintf (stream, "%r", i);
12483 fprintf (stream, "}^\n");
12485 fprintf (stream, "}\n");
12489 /* Output a FLDMD instruction to STREAM.
12490 BASE if the register containing the address.
12491 REG and COUNT specify the register range.
12492 Extra registers may be added to avoid hardware bugs.
12494 We output FLDMD even for ARMv5 VFP implementations. Although
12495 FLDMD is technically not supported until ARMv6, it is believed
12496 that all VFP implementations support its use in this context. */
12499 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12503 /* Workaround ARM10 VFPr1 bug. */
12504 if (count == 2 && !arm_arch6)
12511 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12512 load into multiple parts if we have to handle more than 16 registers. */
12515 vfp_output_fldmd (stream, base, reg, 16);
12516 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12520 fputc ('\t', stream);
12521 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12523 for (i = reg; i < reg + count; i++)
12526 fputs (", ", stream);
12527 asm_fprintf (stream, "d%d", i);
12529 fputs ("}\n", stream);
12534 /* Output the assembly for a store multiple. */
12537 vfp_output_fstmd (rtx * operands)
12544 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12545 p = strlen (pattern);
12547 gcc_assert (GET_CODE (operands[1]) == REG);
12549 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12550 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12552 p += sprintf (&pattern[p], ", d%d", base + i);
12554 strcpy (&pattern[p], "}");
12556 output_asm_insn (pattern, operands);
12561 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12562 number of bytes pushed. */
12565 vfp_emit_fstmd (int base_reg, int count)
12572 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12573 register pairs are stored by a store multiple insn. We avoid this
12574 by pushing an extra pair. */
12575 if (count == 2 && !arm_arch6)
12577 if (base_reg == LAST_VFP_REGNUM - 3)
12582 /* FSTMD may not store more than 16 doubleword registers at once. Split
12583 larger stores into multiple parts (up to a maximum of two, in
12588 /* NOTE: base_reg is an internal register number, so each D register
12590 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12591 saved += vfp_emit_fstmd (base_reg, 16);
12595 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12596 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12598 reg = gen_rtx_REG (DFmode, base_reg);
12601 XVECEXP (par, 0, 0)
12602 = gen_rtx_SET (VOIDmode,
12605 gen_rtx_PRE_MODIFY (Pmode,
12608 (stack_pointer_rtx,
12611 gen_rtx_UNSPEC (BLKmode,
12612 gen_rtvec (1, reg),
12613 UNSPEC_PUSH_MULT));
12615 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12616 plus_constant (stack_pointer_rtx, -(count * 8)));
12617 RTX_FRAME_RELATED_P (tmp) = 1;
12618 XVECEXP (dwarf, 0, 0) = tmp;
12620 tmp = gen_rtx_SET (VOIDmode,
12621 gen_frame_mem (DFmode, stack_pointer_rtx),
12623 RTX_FRAME_RELATED_P (tmp) = 1;
12624 XVECEXP (dwarf, 0, 1) = tmp;
12626 for (i = 1; i < count; i++)
12628 reg = gen_rtx_REG (DFmode, base_reg);
12630 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12632 tmp = gen_rtx_SET (VOIDmode,
12633 gen_frame_mem (DFmode,
12634 plus_constant (stack_pointer_rtx,
12637 RTX_FRAME_RELATED_P (tmp) = 1;
12638 XVECEXP (dwarf, 0, i + 1) = tmp;
12641 par = emit_insn (par);
12642 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12643 RTX_FRAME_RELATED_P (par) = 1;
12648 /* Emit a call instruction with pattern PAT. ADDR is the address of
12649 the call target. */
12652 arm_emit_call_insn (rtx pat, rtx addr)
12656 insn = emit_call_insn (pat);
12658 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12659 If the call might use such an entry, add a use of the PIC register
12660 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12661 if (TARGET_VXWORKS_RTP
12663 && GET_CODE (addr) == SYMBOL_REF
12664 && (SYMBOL_REF_DECL (addr)
12665 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12666 : !SYMBOL_REF_LOCAL_P (addr)))
12668 require_pic_register ();
12669 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12673 /* Output a 'call' insn. */
12675 output_call (rtx *operands)
12677 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12679 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12680 if (REGNO (operands[0]) == LR_REGNUM)
12682 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12683 output_asm_insn ("mov%?\t%0, %|lr", operands);
12686 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12688 if (TARGET_INTERWORK || arm_arch4t)
12689 output_asm_insn ("bx%?\t%0", operands);
12691 output_asm_insn ("mov%?\t%|pc, %0", operands);
12696 /* Output a 'call' insn that is a reference in memory. This is
12697 disabled for ARMv5 and we prefer a blx instead because otherwise
12698 there's a significant performance overhead. */
12700 output_call_mem (rtx *operands)
12702 gcc_assert (!arm_arch5);
12703 if (TARGET_INTERWORK)
12705 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12706 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12707 output_asm_insn ("bx%?\t%|ip", operands);
12709 else if (regno_use_in (LR_REGNUM, operands[0]))
12711 /* LR is used in the memory address. We load the address in the
12712 first instruction. It's safe to use IP as the target of the
12713 load since the call will kill it anyway. */
12714 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12715 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12717 output_asm_insn ("bx%?\t%|ip", operands);
12719 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12723 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12724 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12731 /* Output a move from arm registers to an fpa registers.
12732 OPERANDS[0] is an fpa register.
12733 OPERANDS[1] is the first registers of an arm register pair. */
12735 output_mov_long_double_fpa_from_arm (rtx *operands)
12737 int arm_reg0 = REGNO (operands[1]);
12740 gcc_assert (arm_reg0 != IP_REGNUM);
12742 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12743 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12744 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12746 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12747 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12752 /* Output a move from an fpa register to arm registers.
12753 OPERANDS[0] is the first registers of an arm register pair.
12754 OPERANDS[1] is an fpa register. */
12756 output_mov_long_double_arm_from_fpa (rtx *operands)
12758 int arm_reg0 = REGNO (operands[0]);
12761 gcc_assert (arm_reg0 != IP_REGNUM);
12763 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12764 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12765 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12767 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12768 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12772 /* Output a move from arm registers to arm registers of a long double
12773 OPERANDS[0] is the destination.
12774 OPERANDS[1] is the source. */
12776 output_mov_long_double_arm_from_arm (rtx *operands)
12778 /* We have to be careful here because the two might overlap. */
12779 int dest_start = REGNO (operands[0]);
12780 int src_start = REGNO (operands[1]);
12784 if (dest_start < src_start)
12786 for (i = 0; i < 3; i++)
12788 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12789 ops[1] = gen_rtx_REG (SImode, src_start + i);
12790 output_asm_insn ("mov%?\t%0, %1", ops);
12795 for (i = 2; i >= 0; i--)
12797 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12798 ops[1] = gen_rtx_REG (SImode, src_start + i);
12799 output_asm_insn ("mov%?\t%0, %1", ops);
12807 arm_emit_movpair (rtx dest, rtx src)
12809 /* If the src is an immediate, simplify it. */
12810 if (CONST_INT_P (src))
12812 HOST_WIDE_INT val = INTVAL (src);
12813 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12814 if ((val >> 16) & 0x0000ffff)
12815 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12817 GEN_INT ((val >> 16) & 0x0000ffff));
12820 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12821 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12824 /* Output a move from arm registers to an fpa registers.
12825 OPERANDS[0] is an fpa register.
12826 OPERANDS[1] is the first registers of an arm register pair. */
12828 output_mov_double_fpa_from_arm (rtx *operands)
12830 int arm_reg0 = REGNO (operands[1]);
12833 gcc_assert (arm_reg0 != IP_REGNUM);
12835 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12836 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12837 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12838 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12842 /* Output a move from an fpa register to arm registers.
12843 OPERANDS[0] is the first registers of an arm register pair.
12844 OPERANDS[1] is an fpa register. */
12846 output_mov_double_arm_from_fpa (rtx *operands)
12848 int arm_reg0 = REGNO (operands[0]);
12851 gcc_assert (arm_reg0 != IP_REGNUM);
12853 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12854 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12855 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12856 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12860 /* Output a move between double words.
12861 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12862 or MEM<-REG and all MEMs must be offsettable addresses. */
12864 output_move_double (rtx *operands)
12866 enum rtx_code code0 = GET_CODE (operands[0]);
12867 enum rtx_code code1 = GET_CODE (operands[1]);
12872 unsigned int reg0 = REGNO (operands[0]);
12874 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12876 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12878 switch (GET_CODE (XEXP (operands[1], 0)))
12882 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12883 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12885 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12889 gcc_assert (TARGET_LDRD);
12890 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12895 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12897 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12902 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12904 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12908 gcc_assert (TARGET_LDRD);
12909 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12914 /* Autoicrement addressing modes should never have overlapping
12915 base and destination registers, and overlapping index registers
12916 are already prohibited, so this doesn't need to worry about
12918 otherops[0] = operands[0];
12919 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12920 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12922 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12924 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12926 /* Registers overlap so split out the increment. */
12927 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12928 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12932 /* Use a single insn if we can.
12933 FIXME: IWMMXT allows offsets larger than ldrd can
12934 handle, fix these up with a pair of ldr. */
12936 || GET_CODE (otherops[2]) != CONST_INT
12937 || (INTVAL (otherops[2]) > -256
12938 && INTVAL (otherops[2]) < 256))
12939 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12942 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12943 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12949 /* Use a single insn if we can.
12950 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12951 fix these up with a pair of ldr. */
12953 || GET_CODE (otherops[2]) != CONST_INT
12954 || (INTVAL (otherops[2]) > -256
12955 && INTVAL (otherops[2]) < 256))
12956 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12959 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12960 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12967 /* We might be able to use ldrd %0, %1 here. However the range is
12968 different to ldr/adr, and it is broken on some ARMv7-M
12969 implementations. */
12970 /* Use the second register of the pair to avoid problematic
12972 otherops[1] = operands[1];
12973 output_asm_insn ("adr%?\t%0, %1", otherops);
12974 operands[1] = otherops[0];
12976 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12978 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12981 /* ??? This needs checking for thumb2. */
12983 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12984 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12986 otherops[0] = operands[0];
12987 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12988 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12990 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12992 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12994 switch ((int) INTVAL (otherops[2]))
12997 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13002 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13007 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13011 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13012 operands[1] = otherops[0];
13014 && (GET_CODE (otherops[2]) == REG
13016 || (GET_CODE (otherops[2]) == CONST_INT
13017 && INTVAL (otherops[2]) > -256
13018 && INTVAL (otherops[2]) < 256)))
13020 if (reg_overlap_mentioned_p (operands[0],
13024 /* Swap base and index registers over to
13025 avoid a conflict. */
13027 otherops[1] = otherops[2];
13030 /* If both registers conflict, it will usually
13031 have been fixed by a splitter. */
13032 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13033 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13035 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13036 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13040 otherops[0] = operands[0];
13041 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13046 if (GET_CODE (otherops[2]) == CONST_INT)
13048 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13049 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13051 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13054 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13057 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13060 return "ldr%(d%)\t%0, [%1]";
13062 return "ldm%(ia%)\t%1, %M0";
13066 otherops[1] = adjust_address (operands[1], SImode, 4);
13067 /* Take care of overlapping base/data reg. */
13068 if (reg_mentioned_p (operands[0], operands[1]))
13070 output_asm_insn ("ldr%?\t%0, %1", otherops);
13071 output_asm_insn ("ldr%?\t%0, %1", operands);
13075 output_asm_insn ("ldr%?\t%0, %1", operands);
13076 output_asm_insn ("ldr%?\t%0, %1", otherops);
13083 /* Constraints should ensure this. */
13084 gcc_assert (code0 == MEM && code1 == REG);
13085 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13087 switch (GET_CODE (XEXP (operands[0], 0)))
13091 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13093 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13097 gcc_assert (TARGET_LDRD);
13098 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13103 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13105 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13110 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13112 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13116 gcc_assert (TARGET_LDRD);
13117 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13122 otherops[0] = operands[1];
13123 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13124 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13126 /* IWMMXT allows offsets larger than ldrd can handle,
13127 fix these up with a pair of ldr. */
13129 && GET_CODE (otherops[2]) == CONST_INT
13130 && (INTVAL(otherops[2]) <= -256
13131 || INTVAL(otherops[2]) >= 256))
13133 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13135 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13136 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13140 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13141 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13144 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13145 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13147 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13151 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13152 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13154 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13157 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13163 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13169 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13174 && (GET_CODE (otherops[2]) == REG
13176 || (GET_CODE (otherops[2]) == CONST_INT
13177 && INTVAL (otherops[2]) > -256
13178 && INTVAL (otherops[2]) < 256)))
13180 otherops[0] = operands[1];
13181 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13182 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13188 otherops[0] = adjust_address (operands[0], SImode, 4);
13189 otherops[1] = operands[1];
13190 output_asm_insn ("str%?\t%1, %0", operands);
13191 output_asm_insn ("str%?\t%H1, %0", otherops);
13198 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13199 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13202 output_move_quad (rtx *operands)
13204 if (REG_P (operands[0]))
13206 /* Load, or reg->reg move. */
13208 if (MEM_P (operands[1]))
13210 switch (GET_CODE (XEXP (operands[1], 0)))
13213 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13218 output_asm_insn ("adr%?\t%0, %1", operands);
13219 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13223 gcc_unreachable ();
13231 gcc_assert (REG_P (operands[1]));
13233 dest = REGNO (operands[0]);
13234 src = REGNO (operands[1]);
13236 /* This seems pretty dumb, but hopefully GCC won't try to do it
13239 for (i = 0; i < 4; i++)
13241 ops[0] = gen_rtx_REG (SImode, dest + i);
13242 ops[1] = gen_rtx_REG (SImode, src + i);
13243 output_asm_insn ("mov%?\t%0, %1", ops);
13246 for (i = 3; i >= 0; i--)
13248 ops[0] = gen_rtx_REG (SImode, dest + i);
13249 ops[1] = gen_rtx_REG (SImode, src + i);
13250 output_asm_insn ("mov%?\t%0, %1", ops);
13256 gcc_assert (MEM_P (operands[0]));
13257 gcc_assert (REG_P (operands[1]));
13258 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13260 switch (GET_CODE (XEXP (operands[0], 0)))
13263 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13267 gcc_unreachable ();
13274 /* Output a VFP load or store instruction. */
13277 output_move_vfp (rtx *operands)
13279 rtx reg, mem, addr, ops[2];
13280 int load = REG_P (operands[0]);
13281 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13282 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13285 enum machine_mode mode;
13287 reg = operands[!load];
13288 mem = operands[load];
13290 mode = GET_MODE (reg);
13292 gcc_assert (REG_P (reg));
13293 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13294 gcc_assert (mode == SFmode
13298 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13299 gcc_assert (MEM_P (mem));
13301 addr = XEXP (mem, 0);
13303 switch (GET_CODE (addr))
13306 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13307 ops[0] = XEXP (addr, 0);
13312 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13313 ops[0] = XEXP (addr, 0);
13318 templ = "f%s%c%%?\t%%%s0, %%1%s";
13324 sprintf (buff, templ,
13325 load ? "ld" : "st",
13328 integer_p ? "\t%@ int" : "");
13329 output_asm_insn (buff, ops);
13334 /* Output a Neon quad-word load or store, or a load or store for
13335 larger structure modes.
13337 WARNING: The ordering of elements is weird in big-endian mode,
13338 because we use VSTM, as required by the EABI. GCC RTL defines
13339 element ordering based on in-memory order. This can be differ
13340 from the architectural ordering of elements within a NEON register.
13341 The intrinsics defined in arm_neon.h use the NEON register element
13342 ordering, not the GCC RTL element ordering.
13344 For example, the in-memory ordering of a big-endian a quadword
13345 vector with 16-bit elements when stored from register pair {d0,d1}
13346 will be (lowest address first, d0[N] is NEON register element N):
13348 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13350 When necessary, quadword registers (dN, dN+1) are moved to ARM
13351 registers from rN in the order:
13353 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13355 So that STM/LDM can be used on vectors in ARM registers, and the
13356 same memory layout will result as if VSTM/VLDM were used. */
13359 output_move_neon (rtx *operands)
13361 rtx reg, mem, addr, ops[2];
13362 int regno, load = REG_P (operands[0]);
13365 enum machine_mode mode;
13367 reg = operands[!load];
13368 mem = operands[load];
13370 mode = GET_MODE (reg);
13372 gcc_assert (REG_P (reg));
13373 regno = REGNO (reg);
13374 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13375 || NEON_REGNO_OK_FOR_QUAD (regno));
13376 gcc_assert (VALID_NEON_DREG_MODE (mode)
13377 || VALID_NEON_QREG_MODE (mode)
13378 || VALID_NEON_STRUCT_MODE (mode));
13379 gcc_assert (MEM_P (mem));
13381 addr = XEXP (mem, 0);
13383 /* Strip off const from addresses like (const (plus (...))). */
13384 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13385 addr = XEXP (addr, 0);
13387 switch (GET_CODE (addr))
13390 templ = "v%smia%%?\t%%0!, %%h1";
13391 ops[0] = XEXP (addr, 0);
13396 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13397 templ = "v%smdb%%?\t%%0!, %%h1";
13398 ops[0] = XEXP (addr, 0);
13403 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13404 gcc_unreachable ();
13409 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13412 for (i = 0; i < nregs; i++)
13414 /* We're only using DImode here because it's a convenient size. */
13415 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13416 ops[1] = adjust_address (mem, DImode, 8 * i);
13417 if (reg_overlap_mentioned_p (ops[0], mem))
13419 gcc_assert (overlap == -1);
13424 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13425 output_asm_insn (buff, ops);
13430 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13431 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13432 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13433 output_asm_insn (buff, ops);
13440 templ = "v%smia%%?\t%%m0, %%h1";
13445 sprintf (buff, templ, load ? "ld" : "st");
13446 output_asm_insn (buff, ops);
13451 /* Compute and return the length of neon_mov<mode>, where <mode> is
13452 one of VSTRUCT modes: EI, OI, CI or XI. */
13454 arm_attr_length_move_neon (rtx insn)
13456 rtx reg, mem, addr;
13458 enum machine_mode mode;
13460 extract_insn_cached (insn);
13462 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13464 mode = GET_MODE (recog_data.operand[0]);
13475 gcc_unreachable ();
13479 load = REG_P (recog_data.operand[0]);
13480 reg = recog_data.operand[!load];
13481 mem = recog_data.operand[load];
13483 gcc_assert (MEM_P (mem));
13485 mode = GET_MODE (reg);
13486 addr = XEXP (mem, 0);
13488 /* Strip off const from addresses like (const (plus (...))). */
13489 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13490 addr = XEXP (addr, 0);
13492 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13494 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13501 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13505 arm_address_offset_is_imm (rtx insn)
13509 extract_insn_cached (insn);
13511 if (REG_P (recog_data.operand[0]))
13514 mem = recog_data.operand[0];
13516 gcc_assert (MEM_P (mem));
13518 addr = XEXP (mem, 0);
13520 if (GET_CODE (addr) == REG
13521 || (GET_CODE (addr) == PLUS
13522 && GET_CODE (XEXP (addr, 0)) == REG
13523 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13529 /* Output an ADD r, s, #n where n may be too big for one instruction.
13530 If adding zero to one register, output nothing. */
13532 output_add_immediate (rtx *operands)
13534 HOST_WIDE_INT n = INTVAL (operands[2]);
13536 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13539 output_multi_immediate (operands,
13540 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13543 output_multi_immediate (operands,
13544 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13551 /* Output a multiple immediate operation.
13552 OPERANDS is the vector of operands referred to in the output patterns.
13553 INSTR1 is the output pattern to use for the first constant.
13554 INSTR2 is the output pattern to use for subsequent constants.
13555 IMMED_OP is the index of the constant slot in OPERANDS.
13556 N is the constant value. */
13557 static const char *
13558 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13559 int immed_op, HOST_WIDE_INT n)
13561 #if HOST_BITS_PER_WIDE_INT > 32
13567 /* Quick and easy output. */
13568 operands[immed_op] = const0_rtx;
13569 output_asm_insn (instr1, operands);
13574 const char * instr = instr1;
13576 /* Note that n is never zero here (which would give no output). */
13577 for (i = 0; i < 32; i += 2)
13581 operands[immed_op] = GEN_INT (n & (255 << i));
13582 output_asm_insn (instr, operands);
13592 /* Return the name of a shifter operation. */
13593 static const char *
13594 arm_shift_nmem(enum rtx_code code)
13599 return ARM_LSL_NAME;
13615 /* Return the appropriate ARM instruction for the operation code.
13616 The returned result should not be overwritten. OP is the rtx of the
13617 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13620 arithmetic_instr (rtx op, int shift_first_arg)
13622 switch (GET_CODE (op))
13628 return shift_first_arg ? "rsb" : "sub";
13643 return arm_shift_nmem(GET_CODE(op));
13646 gcc_unreachable ();
13650 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13651 for the operation code. The returned result should not be overwritten.
13652 OP is the rtx code of the shift.
13653 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13655 static const char *
13656 shift_op (rtx op, HOST_WIDE_INT *amountp)
13659 enum rtx_code code = GET_CODE (op);
13661 switch (GET_CODE (XEXP (op, 1)))
13669 *amountp = INTVAL (XEXP (op, 1));
13673 gcc_unreachable ();
13679 gcc_assert (*amountp != -1);
13680 *amountp = 32 - *amountp;
13683 /* Fall through. */
13689 mnem = arm_shift_nmem(code);
13693 /* We never have to worry about the amount being other than a
13694 power of 2, since this case can never be reloaded from a reg. */
13695 gcc_assert (*amountp != -1);
13696 *amountp = int_log2 (*amountp);
13697 return ARM_LSL_NAME;
13700 gcc_unreachable ();
13703 if (*amountp != -1)
13705 /* This is not 100% correct, but follows from the desire to merge
13706 multiplication by a power of 2 with the recognizer for a
13707 shift. >=32 is not a valid shift for "lsl", so we must try and
13708 output a shift that produces the correct arithmetical result.
13709 Using lsr #32 is identical except for the fact that the carry bit
13710 is not set correctly if we set the flags; but we never use the
13711 carry bit from such an operation, so we can ignore that. */
13712 if (code == ROTATERT)
13713 /* Rotate is just modulo 32. */
13715 else if (*amountp != (*amountp & 31))
13717 if (code == ASHIFT)
13722 /* Shifts of 0 are no-ops. */
13730 /* Obtain the shift from the POWER of two. */
13732 static HOST_WIDE_INT
13733 int_log2 (HOST_WIDE_INT power)
13735 HOST_WIDE_INT shift = 0;
13737 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13739 gcc_assert (shift <= 31);
13746 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13747 because /bin/as is horribly restrictive. The judgement about
13748 whether or not each character is 'printable' (and can be output as
13749 is) or not (and must be printed with an octal escape) must be made
13750 with reference to the *host* character set -- the situation is
13751 similar to that discussed in the comments above pp_c_char in
13752 c-pretty-print.c. */
13754 #define MAX_ASCII_LEN 51
13757 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13760 int len_so_far = 0;
13762 fputs ("\t.ascii\t\"", stream);
13764 for (i = 0; i < len; i++)
13768 if (len_so_far >= MAX_ASCII_LEN)
13770 fputs ("\"\n\t.ascii\t\"", stream);
13776 if (c == '\\' || c == '\"')
13778 putc ('\\', stream);
13786 fprintf (stream, "\\%03o", c);
13791 fputs ("\"\n", stream);
13794 /* Compute the register save mask for registers 0 through 12
13795 inclusive. This code is used by arm_compute_save_reg_mask. */
13797 static unsigned long
13798 arm_compute_save_reg0_reg12_mask (void)
13800 unsigned long func_type = arm_current_func_type ();
13801 unsigned long save_reg_mask = 0;
13804 if (IS_INTERRUPT (func_type))
13806 unsigned int max_reg;
13807 /* Interrupt functions must not corrupt any registers,
13808 even call clobbered ones. If this is a leaf function
13809 we can just examine the registers used by the RTL, but
13810 otherwise we have to assume that whatever function is
13811 called might clobber anything, and so we have to save
13812 all the call-clobbered registers as well. */
13813 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13814 /* FIQ handlers have registers r8 - r12 banked, so
13815 we only need to check r0 - r7, Normal ISRs only
13816 bank r14 and r15, so we must check up to r12.
13817 r13 is the stack pointer which is always preserved,
13818 so we do not need to consider it here. */
13823 for (reg = 0; reg <= max_reg; reg++)
13824 if (df_regs_ever_live_p (reg)
13825 || (! current_function_is_leaf && call_used_regs[reg]))
13826 save_reg_mask |= (1 << reg);
13828 /* Also save the pic base register if necessary. */
13830 && !TARGET_SINGLE_PIC_BASE
13831 && arm_pic_register != INVALID_REGNUM
13832 && crtl->uses_pic_offset_table)
13833 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13835 else if (IS_VOLATILE(func_type))
13837 /* For noreturn functions we historically omitted register saves
13838 altogether. However this really messes up debugging. As a
13839 compromise save just the frame pointers. Combined with the link
13840 register saved elsewhere this should be sufficient to get
13842 if (frame_pointer_needed)
13843 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13844 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13845 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13846 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13847 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13851 /* In the normal case we only need to save those registers
13852 which are call saved and which are used by this function. */
13853 for (reg = 0; reg <= 11; reg++)
13854 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13855 save_reg_mask |= (1 << reg);
13857 /* Handle the frame pointer as a special case. */
13858 if (frame_pointer_needed)
13859 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13861 /* If we aren't loading the PIC register,
13862 don't stack it even though it may be live. */
13864 && !TARGET_SINGLE_PIC_BASE
13865 && arm_pic_register != INVALID_REGNUM
13866 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13867 || crtl->uses_pic_offset_table))
13868 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13870 /* The prologue will copy SP into R0, so save it. */
13871 if (IS_STACKALIGN (func_type))
13872 save_reg_mask |= 1;
13875 /* Save registers so the exception handler can modify them. */
13876 if (crtl->calls_eh_return)
13882 reg = EH_RETURN_DATA_REGNO (i);
13883 if (reg == INVALID_REGNUM)
13885 save_reg_mask |= 1 << reg;
13889 return save_reg_mask;
13893 /* Compute the number of bytes used to store the static chain register on the
13894 stack, above the stack frame. We need to know this accurately to get the
13895 alignment of the rest of the stack frame correct. */
13897 static int arm_compute_static_chain_stack_bytes (void)
13899 unsigned long func_type = arm_current_func_type ();
13900 int static_chain_stack_bytes = 0;
13902 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13903 IS_NESTED (func_type) &&
13904 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13905 static_chain_stack_bytes = 4;
13907 return static_chain_stack_bytes;
13911 /* Compute a bit mask of which registers need to be
13912 saved on the stack for the current function.
13913 This is used by arm_get_frame_offsets, which may add extra registers. */
13915 static unsigned long
13916 arm_compute_save_reg_mask (void)
13918 unsigned int save_reg_mask = 0;
13919 unsigned long func_type = arm_current_func_type ();
13922 if (IS_NAKED (func_type))
13923 /* This should never really happen. */
13926 /* If we are creating a stack frame, then we must save the frame pointer,
13927 IP (which will hold the old stack pointer), LR and the PC. */
13928 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13930 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13933 | (1 << PC_REGNUM);
13935 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13937 /* Decide if we need to save the link register.
13938 Interrupt routines have their own banked link register,
13939 so they never need to save it.
13940 Otherwise if we do not use the link register we do not need to save
13941 it. If we are pushing other registers onto the stack however, we
13942 can save an instruction in the epilogue by pushing the link register
13943 now and then popping it back into the PC. This incurs extra memory
13944 accesses though, so we only do it when optimizing for size, and only
13945 if we know that we will not need a fancy return sequence. */
13946 if (df_regs_ever_live_p (LR_REGNUM)
13949 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13950 && !crtl->calls_eh_return))
13951 save_reg_mask |= 1 << LR_REGNUM;
13953 if (cfun->machine->lr_save_eliminated)
13954 save_reg_mask &= ~ (1 << LR_REGNUM);
13956 if (TARGET_REALLY_IWMMXT
13957 && ((bit_count (save_reg_mask)
13958 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13959 arm_compute_static_chain_stack_bytes())
13962 /* The total number of registers that are going to be pushed
13963 onto the stack is odd. We need to ensure that the stack
13964 is 64-bit aligned before we start to save iWMMXt registers,
13965 and also before we start to create locals. (A local variable
13966 might be a double or long long which we will load/store using
13967 an iWMMXt instruction). Therefore we need to push another
13968 ARM register, so that the stack will be 64-bit aligned. We
13969 try to avoid using the arg registers (r0 -r3) as they might be
13970 used to pass values in a tail call. */
13971 for (reg = 4; reg <= 12; reg++)
13972 if ((save_reg_mask & (1 << reg)) == 0)
13976 save_reg_mask |= (1 << reg);
13979 cfun->machine->sibcall_blocked = 1;
13980 save_reg_mask |= (1 << 3);
13984 /* We may need to push an additional register for use initializing the
13985 PIC base register. */
13986 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13987 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13989 reg = thumb_find_work_register (1 << 4);
13990 if (!call_used_regs[reg])
13991 save_reg_mask |= (1 << reg);
13994 return save_reg_mask;
13998 /* Compute a bit mask of which registers need to be
13999 saved on the stack for the current function. */
14000 static unsigned long
14001 thumb1_compute_save_reg_mask (void)
14003 unsigned long mask;
14007 for (reg = 0; reg < 12; reg ++)
14008 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14012 && !TARGET_SINGLE_PIC_BASE
14013 && arm_pic_register != INVALID_REGNUM
14014 && crtl->uses_pic_offset_table)
14015 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14017 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14018 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14019 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14021 /* LR will also be pushed if any lo regs are pushed. */
14022 if (mask & 0xff || thumb_force_lr_save ())
14023 mask |= (1 << LR_REGNUM);
14025 /* Make sure we have a low work register if we need one.
14026 We will need one if we are going to push a high register,
14027 but we are not currently intending to push a low register. */
14028 if ((mask & 0xff) == 0
14029 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14031 /* Use thumb_find_work_register to choose which register
14032 we will use. If the register is live then we will
14033 have to push it. Use LAST_LO_REGNUM as our fallback
14034 choice for the register to select. */
14035 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14036 /* Make sure the register returned by thumb_find_work_register is
14037 not part of the return value. */
14038 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14039 reg = LAST_LO_REGNUM;
14041 if (! call_used_regs[reg])
14045 /* The 504 below is 8 bytes less than 512 because there are two possible
14046 alignment words. We can't tell here if they will be present or not so we
14047 have to play it safe and assume that they are. */
14048 if ((CALLER_INTERWORKING_SLOT_SIZE +
14049 ROUND_UP_WORD (get_frame_size ()) +
14050 crtl->outgoing_args_size) >= 504)
14052 /* This is the same as the code in thumb1_expand_prologue() which
14053 determines which register to use for stack decrement. */
14054 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14055 if (mask & (1 << reg))
14058 if (reg > LAST_LO_REGNUM)
14060 /* Make sure we have a register available for stack decrement. */
14061 mask |= 1 << LAST_LO_REGNUM;
14069 /* Return the number of bytes required to save VFP registers. */
14071 arm_get_vfp_saved_size (void)
14073 unsigned int regno;
14078 /* Space for saved VFP registers. */
14079 if (TARGET_HARD_FLOAT && TARGET_VFP)
14082 for (regno = FIRST_VFP_REGNUM;
14083 regno < LAST_VFP_REGNUM;
14086 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14087 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14091 /* Workaround ARM10 VFPr1 bug. */
14092 if (count == 2 && !arm_arch6)
14094 saved += count * 8;
14103 if (count == 2 && !arm_arch6)
14105 saved += count * 8;
14112 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14113 everything bar the final return instruction. */
14115 output_return_instruction (rtx operand, int really_return, int reverse)
14117 char conditional[10];
14120 unsigned long live_regs_mask;
14121 unsigned long func_type;
14122 arm_stack_offsets *offsets;
14124 func_type = arm_current_func_type ();
14126 if (IS_NAKED (func_type))
14129 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14131 /* If this function was declared non-returning, and we have
14132 found a tail call, then we have to trust that the called
14133 function won't return. */
14138 /* Otherwise, trap an attempted return by aborting. */
14140 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14142 assemble_external_libcall (ops[1]);
14143 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14149 gcc_assert (!cfun->calls_alloca || really_return);
14151 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14153 cfun->machine->return_used_this_function = 1;
14155 offsets = arm_get_frame_offsets ();
14156 live_regs_mask = offsets->saved_regs_mask;
14158 if (live_regs_mask)
14160 const char * return_reg;
14162 /* If we do not have any special requirements for function exit
14163 (e.g. interworking) then we can load the return address
14164 directly into the PC. Otherwise we must load it into LR. */
14166 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14167 return_reg = reg_names[PC_REGNUM];
14169 return_reg = reg_names[LR_REGNUM];
14171 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14173 /* There are three possible reasons for the IP register
14174 being saved. 1) a stack frame was created, in which case
14175 IP contains the old stack pointer, or 2) an ISR routine
14176 corrupted it, or 3) it was saved to align the stack on
14177 iWMMXt. In case 1, restore IP into SP, otherwise just
14179 if (frame_pointer_needed)
14181 live_regs_mask &= ~ (1 << IP_REGNUM);
14182 live_regs_mask |= (1 << SP_REGNUM);
14185 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14188 /* On some ARM architectures it is faster to use LDR rather than
14189 LDM to load a single register. On other architectures, the
14190 cost is the same. In 26 bit mode, or for exception handlers,
14191 we have to use LDM to load the PC so that the CPSR is also
14193 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14194 if (live_regs_mask == (1U << reg))
14197 if (reg <= LAST_ARM_REGNUM
14198 && (reg != LR_REGNUM
14200 || ! IS_INTERRUPT (func_type)))
14202 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14203 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14210 /* Generate the load multiple instruction to restore the
14211 registers. Note we can get here, even if
14212 frame_pointer_needed is true, but only if sp already
14213 points to the base of the saved core registers. */
14214 if (live_regs_mask & (1 << SP_REGNUM))
14216 unsigned HOST_WIDE_INT stack_adjust;
14218 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14219 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14221 if (stack_adjust && arm_arch5 && TARGET_ARM)
14222 if (TARGET_UNIFIED_ASM)
14223 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14225 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14228 /* If we can't use ldmib (SA110 bug),
14229 then try to pop r3 instead. */
14231 live_regs_mask |= 1 << 3;
14233 if (TARGET_UNIFIED_ASM)
14234 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14236 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14240 if (TARGET_UNIFIED_ASM)
14241 sprintf (instr, "pop%s\t{", conditional);
14243 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14245 p = instr + strlen (instr);
14247 for (reg = 0; reg <= SP_REGNUM; reg++)
14248 if (live_regs_mask & (1 << reg))
14250 int l = strlen (reg_names[reg]);
14256 memcpy (p, ", ", 2);
14260 memcpy (p, "%|", 2);
14261 memcpy (p + 2, reg_names[reg], l);
14265 if (live_regs_mask & (1 << LR_REGNUM))
14267 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14268 /* If returning from an interrupt, restore the CPSR. */
14269 if (IS_INTERRUPT (func_type))
14276 output_asm_insn (instr, & operand);
14278 /* See if we need to generate an extra instruction to
14279 perform the actual function return. */
14281 && func_type != ARM_FT_INTERWORKED
14282 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14284 /* The return has already been handled
14285 by loading the LR into the PC. */
14292 switch ((int) ARM_FUNC_TYPE (func_type))
14296 /* ??? This is wrong for unified assembly syntax. */
14297 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14300 case ARM_FT_INTERWORKED:
14301 sprintf (instr, "bx%s\t%%|lr", conditional);
14304 case ARM_FT_EXCEPTION:
14305 /* ??? This is wrong for unified assembly syntax. */
14306 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14310 /* Use bx if it's available. */
14311 if (arm_arch5 || arm_arch4t)
14312 sprintf (instr, "bx%s\t%%|lr", conditional);
14314 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14318 output_asm_insn (instr, & operand);
14324 /* Write the function name into the code section, directly preceding
14325 the function prologue.
14327 Code will be output similar to this:
14329 .ascii "arm_poke_function_name", 0
14332 .word 0xff000000 + (t1 - t0)
14333 arm_poke_function_name
14335 stmfd sp!, {fp, ip, lr, pc}
14338 When performing a stack backtrace, code can inspect the value
14339 of 'pc' stored at 'fp' + 0. If the trace function then looks
14340 at location pc - 12 and the top 8 bits are set, then we know
14341 that there is a function name embedded immediately preceding this
14342 location and has length ((pc[-3]) & 0xff000000).
14344 We assume that pc is declared as a pointer to an unsigned long.
14346 It is of no benefit to output the function name if we are assembling
14347 a leaf function. These function types will not contain a stack
14348 backtrace structure, therefore it is not possible to determine the
14351 arm_poke_function_name (FILE *stream, const char *name)
14353 unsigned long alignlength;
14354 unsigned long length;
14357 length = strlen (name) + 1;
14358 alignlength = ROUND_UP_WORD (length);
14360 ASM_OUTPUT_ASCII (stream, name, length);
14361 ASM_OUTPUT_ALIGN (stream, 2);
14362 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14363 assemble_aligned_integer (UNITS_PER_WORD, x);
14366 /* Place some comments into the assembler stream
14367 describing the current function. */
14369 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14371 unsigned long func_type;
14375 thumb1_output_function_prologue (f, frame_size);
14379 /* Sanity check. */
14380 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14382 func_type = arm_current_func_type ();
14384 switch ((int) ARM_FUNC_TYPE (func_type))
14387 case ARM_FT_NORMAL:
14389 case ARM_FT_INTERWORKED:
14390 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14393 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14396 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14398 case ARM_FT_EXCEPTION:
14399 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14403 if (IS_NAKED (func_type))
14404 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14406 if (IS_VOLATILE (func_type))
14407 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14409 if (IS_NESTED (func_type))
14410 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14411 if (IS_STACKALIGN (func_type))
14412 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14414 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14416 crtl->args.pretend_args_size, frame_size);
14418 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14419 frame_pointer_needed,
14420 cfun->machine->uses_anonymous_args);
14422 if (cfun->machine->lr_save_eliminated)
14423 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14425 if (crtl->calls_eh_return)
14426 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14431 arm_output_epilogue (rtx sibling)
14434 unsigned long saved_regs_mask;
14435 unsigned long func_type;
14436 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14437 frame that is $fp + 4 for a non-variadic function. */
14438 int floats_offset = 0;
14440 FILE * f = asm_out_file;
14441 unsigned int lrm_count = 0;
14442 int really_return = (sibling == NULL);
14444 arm_stack_offsets *offsets;
14446 /* If we have already generated the return instruction
14447 then it is futile to generate anything else. */
14448 if (use_return_insn (FALSE, sibling) &&
14449 (cfun->machine->return_used_this_function != 0))
14452 func_type = arm_current_func_type ();
14454 if (IS_NAKED (func_type))
14455 /* Naked functions don't have epilogues. */
14458 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14462 /* A volatile function should never return. Call abort. */
14463 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14464 assemble_external_libcall (op);
14465 output_asm_insn ("bl\t%a0", &op);
14470 /* If we are throwing an exception, then we really must be doing a
14471 return, so we can't tail-call. */
14472 gcc_assert (!crtl->calls_eh_return || really_return);
14474 offsets = arm_get_frame_offsets ();
14475 saved_regs_mask = offsets->saved_regs_mask;
14478 lrm_count = bit_count (saved_regs_mask);
14480 floats_offset = offsets->saved_args;
14481 /* Compute how far away the floats will be. */
14482 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14483 if (saved_regs_mask & (1 << reg))
14484 floats_offset += 4;
14486 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14488 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14489 int vfp_offset = offsets->frame;
14491 if (TARGET_FPA_EMU2)
14493 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14494 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14496 floats_offset += 12;
14497 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14498 reg, FP_REGNUM, floats_offset - vfp_offset);
14503 start_reg = LAST_FPA_REGNUM;
14505 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14507 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14509 floats_offset += 12;
14511 /* We can't unstack more than four registers at once. */
14512 if (start_reg - reg == 3)
14514 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14515 reg, FP_REGNUM, floats_offset - vfp_offset);
14516 start_reg = reg - 1;
14521 if (reg != start_reg)
14522 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14523 reg + 1, start_reg - reg,
14524 FP_REGNUM, floats_offset - vfp_offset);
14525 start_reg = reg - 1;
14529 /* Just in case the last register checked also needs unstacking. */
14530 if (reg != start_reg)
14531 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14532 reg + 1, start_reg - reg,
14533 FP_REGNUM, floats_offset - vfp_offset);
14536 if (TARGET_HARD_FLOAT && TARGET_VFP)
14540 /* The fldmd insns do not have base+offset addressing
14541 modes, so we use IP to hold the address. */
14542 saved_size = arm_get_vfp_saved_size ();
14544 if (saved_size > 0)
14546 floats_offset += saved_size;
14547 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14548 FP_REGNUM, floats_offset - vfp_offset);
14550 start_reg = FIRST_VFP_REGNUM;
14551 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14553 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14554 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14556 if (start_reg != reg)
14557 vfp_output_fldmd (f, IP_REGNUM,
14558 (start_reg - FIRST_VFP_REGNUM) / 2,
14559 (reg - start_reg) / 2);
14560 start_reg = reg + 2;
14563 if (start_reg != reg)
14564 vfp_output_fldmd (f, IP_REGNUM,
14565 (start_reg - FIRST_VFP_REGNUM) / 2,
14566 (reg - start_reg) / 2);
14571 /* The frame pointer is guaranteed to be non-double-word aligned.
14572 This is because it is set to (old_stack_pointer - 4) and the
14573 old_stack_pointer was double word aligned. Thus the offset to
14574 the iWMMXt registers to be loaded must also be non-double-word
14575 sized, so that the resultant address *is* double-word aligned.
14576 We can ignore floats_offset since that was already included in
14577 the live_regs_mask. */
14578 lrm_count += (lrm_count % 2 ? 2 : 1);
14580 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14581 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14583 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14584 reg, FP_REGNUM, lrm_count * 4);
14589 /* saved_regs_mask should contain the IP, which at the time of stack
14590 frame generation actually contains the old stack pointer. So a
14591 quick way to unwind the stack is just pop the IP register directly
14592 into the stack pointer. */
14593 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14594 saved_regs_mask &= ~ (1 << IP_REGNUM);
14595 saved_regs_mask |= (1 << SP_REGNUM);
14597 /* There are two registers left in saved_regs_mask - LR and PC. We
14598 only need to restore the LR register (the return address), but to
14599 save time we can load it directly into the PC, unless we need a
14600 special function exit sequence, or we are not really returning. */
14602 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14603 && !crtl->calls_eh_return)
14604 /* Delete the LR from the register mask, so that the LR on
14605 the stack is loaded into the PC in the register mask. */
14606 saved_regs_mask &= ~ (1 << LR_REGNUM);
14608 saved_regs_mask &= ~ (1 << PC_REGNUM);
14610 /* We must use SP as the base register, because SP is one of the
14611 registers being restored. If an interrupt or page fault
14612 happens in the ldm instruction, the SP might or might not
14613 have been restored. That would be bad, as then SP will no
14614 longer indicate the safe area of stack, and we can get stack
14615 corruption. Using SP as the base register means that it will
14616 be reset correctly to the original value, should an interrupt
14617 occur. If the stack pointer already points at the right
14618 place, then omit the subtraction. */
14619 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14620 || cfun->calls_alloca)
14621 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14622 4 * bit_count (saved_regs_mask));
14623 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14625 if (IS_INTERRUPT (func_type))
14626 /* Interrupt handlers will have pushed the
14627 IP onto the stack, so restore it now. */
14628 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14632 /* This branch is executed for ARM mode (non-apcs frames) and
14633 Thumb-2 mode. Frame layout is essentially the same for those
14634 cases, except that in ARM mode frame pointer points to the
14635 first saved register, while in Thumb-2 mode the frame pointer points
14636 to the last saved register.
14638 It is possible to make frame pointer point to last saved
14639 register in both cases, and remove some conditionals below.
14640 That means that fp setup in prologue would be just "mov fp, sp"
14641 and sp restore in epilogue would be just "mov sp, fp", whereas
14642 now we have to use add/sub in those cases. However, the value
14643 of that would be marginal, as both mov and add/sub are 32-bit
14644 in ARM mode, and it would require extra conditionals
14645 in arm_expand_prologue to distingish ARM-apcs-frame case
14646 (where frame pointer is required to point at first register)
14647 and ARM-non-apcs-frame. Therefore, such change is postponed
14648 until real need arise. */
14649 unsigned HOST_WIDE_INT amount;
14651 /* Restore stack pointer if necessary. */
14652 if (TARGET_ARM && frame_pointer_needed)
14654 operands[0] = stack_pointer_rtx;
14655 operands[1] = hard_frame_pointer_rtx;
14657 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14658 output_add_immediate (operands);
14662 if (frame_pointer_needed)
14664 /* For Thumb-2 restore sp from the frame pointer.
14665 Operand restrictions mean we have to incrememnt FP, then copy
14667 amount = offsets->locals_base - offsets->saved_regs;
14668 operands[0] = hard_frame_pointer_rtx;
14672 unsigned long count;
14673 operands[0] = stack_pointer_rtx;
14674 amount = offsets->outgoing_args - offsets->saved_regs;
14675 /* pop call clobbered registers if it avoids a
14676 separate stack adjustment. */
14677 count = offsets->saved_regs - offsets->saved_args;
14680 && !crtl->calls_eh_return
14681 && bit_count(saved_regs_mask) * 4 == count
14682 && !IS_INTERRUPT (func_type)
14683 && !crtl->tail_call_emit)
14685 unsigned long mask;
14686 /* Preserve return values, of any size. */
14687 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14689 mask &= ~saved_regs_mask;
14691 while (bit_count (mask) * 4 > amount)
14693 while ((mask & (1 << reg)) == 0)
14695 mask &= ~(1 << reg);
14697 if (bit_count (mask) * 4 == amount) {
14699 saved_regs_mask |= mask;
14706 operands[1] = operands[0];
14707 operands[2] = GEN_INT (amount);
14708 output_add_immediate (operands);
14710 if (frame_pointer_needed)
14711 asm_fprintf (f, "\tmov\t%r, %r\n",
14712 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14715 if (TARGET_FPA_EMU2)
14717 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14718 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14719 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14724 start_reg = FIRST_FPA_REGNUM;
14726 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14728 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14730 if (reg - start_reg == 3)
14732 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14733 start_reg, SP_REGNUM);
14734 start_reg = reg + 1;
14739 if (reg != start_reg)
14740 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14741 start_reg, reg - start_reg,
14744 start_reg = reg + 1;
14748 /* Just in case the last register checked also needs unstacking. */
14749 if (reg != start_reg)
14750 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14751 start_reg, reg - start_reg, SP_REGNUM);
14754 if (TARGET_HARD_FLOAT && TARGET_VFP)
14756 int end_reg = LAST_VFP_REGNUM + 1;
14758 /* Scan the registers in reverse order. We need to match
14759 any groupings made in the prologue and generate matching
14761 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14763 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14764 && (!df_regs_ever_live_p (reg + 1)
14765 || call_used_regs[reg + 1]))
14767 if (end_reg > reg + 2)
14768 vfp_output_fldmd (f, SP_REGNUM,
14769 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14770 (end_reg - (reg + 2)) / 2);
14774 if (end_reg > reg + 2)
14775 vfp_output_fldmd (f, SP_REGNUM, 0,
14776 (end_reg - (reg + 2)) / 2);
14780 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14781 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14782 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14784 /* If we can, restore the LR into the PC. */
14785 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14786 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14787 && !IS_STACKALIGN (func_type)
14789 && crtl->args.pretend_args_size == 0
14790 && saved_regs_mask & (1 << LR_REGNUM)
14791 && !crtl->calls_eh_return)
14793 saved_regs_mask &= ~ (1 << LR_REGNUM);
14794 saved_regs_mask |= (1 << PC_REGNUM);
14795 rfe = IS_INTERRUPT (func_type);
14800 /* Load the registers off the stack. If we only have one register
14801 to load use the LDR instruction - it is faster. For Thumb-2
14802 always use pop and the assembler will pick the best instruction.*/
14803 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14804 && !IS_INTERRUPT(func_type))
14806 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14808 else if (saved_regs_mask)
14810 if (saved_regs_mask & (1 << SP_REGNUM))
14811 /* Note - write back to the stack register is not enabled
14812 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14813 in the list of registers and if we add writeback the
14814 instruction becomes UNPREDICTABLE. */
14815 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14817 else if (TARGET_ARM)
14818 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14821 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14824 if (crtl->args.pretend_args_size)
14826 /* Unwind the pre-pushed regs. */
14827 operands[0] = operands[1] = stack_pointer_rtx;
14828 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14829 output_add_immediate (operands);
14833 /* We may have already restored PC directly from the stack. */
14834 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14837 /* Stack adjustment for exception handler. */
14838 if (crtl->calls_eh_return)
14839 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14840 ARM_EH_STACKADJ_REGNUM);
14842 /* Generate the return instruction. */
14843 switch ((int) ARM_FUNC_TYPE (func_type))
14847 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14850 case ARM_FT_EXCEPTION:
14851 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14854 case ARM_FT_INTERWORKED:
14855 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14859 if (IS_STACKALIGN (func_type))
14861 /* See comment in arm_expand_prologue. */
14862 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14864 if (arm_arch5 || arm_arch4t)
14865 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14867 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14875 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14876 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14878 arm_stack_offsets *offsets;
14884 /* Emit any call-via-reg trampolines that are needed for v4t support
14885 of call_reg and call_value_reg type insns. */
14886 for (regno = 0; regno < LR_REGNUM; regno++)
14888 rtx label = cfun->machine->call_via[regno];
14892 switch_to_section (function_section (current_function_decl));
14893 targetm.asm_out.internal_label (asm_out_file, "L",
14894 CODE_LABEL_NUMBER (label));
14895 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14899 /* ??? Probably not safe to set this here, since it assumes that a
14900 function will be emitted as assembly immediately after we generate
14901 RTL for it. This does not happen for inline functions. */
14902 cfun->machine->return_used_this_function = 0;
14904 else /* TARGET_32BIT */
14906 /* We need to take into account any stack-frame rounding. */
14907 offsets = arm_get_frame_offsets ();
14909 gcc_assert (!use_return_insn (FALSE, NULL)
14910 || (cfun->machine->return_used_this_function != 0)
14911 || offsets->saved_regs == offsets->outgoing_args
14912 || frame_pointer_needed);
14914 /* Reset the ARM-specific per-function variables. */
14915 after_arm_reorg = 0;
14919 /* Generate and emit an insn that we will recognize as a push_multi.
14920 Unfortunately, since this insn does not reflect very well the actual
14921 semantics of the operation, we need to annotate the insn for the benefit
14922 of DWARF2 frame unwind information. */
14924 emit_multi_reg_push (unsigned long mask)
14927 int num_dwarf_regs;
14931 int dwarf_par_index;
14934 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14935 if (mask & (1 << i))
14938 gcc_assert (num_regs && num_regs <= 16);
14940 /* We don't record the PC in the dwarf frame information. */
14941 num_dwarf_regs = num_regs;
14942 if (mask & (1 << PC_REGNUM))
14945 /* For the body of the insn we are going to generate an UNSPEC in
14946 parallel with several USEs. This allows the insn to be recognized
14947 by the push_multi pattern in the arm.md file.
14949 The body of the insn looks something like this:
14952 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14953 (const_int:SI <num>)))
14954 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14960 For the frame note however, we try to be more explicit and actually
14961 show each register being stored into the stack frame, plus a (single)
14962 decrement of the stack pointer. We do it this way in order to be
14963 friendly to the stack unwinding code, which only wants to see a single
14964 stack decrement per instruction. The RTL we generate for the note looks
14965 something like this:
14968 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14969 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14970 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14971 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14975 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14976 instead we'd have a parallel expression detailing all
14977 the stores to the various memory addresses so that debug
14978 information is more up-to-date. Remember however while writing
14979 this to take care of the constraints with the push instruction.
14981 Note also that this has to be taken care of for the VFP registers.
14983 For more see PR43399. */
14985 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14986 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14987 dwarf_par_index = 1;
14989 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14991 if (mask & (1 << i))
14993 reg = gen_rtx_REG (SImode, i);
14995 XVECEXP (par, 0, 0)
14996 = gen_rtx_SET (VOIDmode,
14999 gen_rtx_PRE_MODIFY (Pmode,
15002 (stack_pointer_rtx,
15005 gen_rtx_UNSPEC (BLKmode,
15006 gen_rtvec (1, reg),
15007 UNSPEC_PUSH_MULT));
15009 if (i != PC_REGNUM)
15011 tmp = gen_rtx_SET (VOIDmode,
15012 gen_frame_mem (SImode, stack_pointer_rtx),
15014 RTX_FRAME_RELATED_P (tmp) = 1;
15015 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15023 for (j = 1, i++; j < num_regs; i++)
15025 if (mask & (1 << i))
15027 reg = gen_rtx_REG (SImode, i);
15029 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15031 if (i != PC_REGNUM)
15034 = gen_rtx_SET (VOIDmode,
15037 plus_constant (stack_pointer_rtx,
15040 RTX_FRAME_RELATED_P (tmp) = 1;
15041 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15048 par = emit_insn (par);
15050 tmp = gen_rtx_SET (VOIDmode,
15052 plus_constant (stack_pointer_rtx, -4 * num_regs));
15053 RTX_FRAME_RELATED_P (tmp) = 1;
15054 XVECEXP (dwarf, 0, 0) = tmp;
15056 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15061 /* Calculate the size of the return value that is passed in registers. */
15063 arm_size_return_regs (void)
15065 enum machine_mode mode;
15067 if (crtl->return_rtx != 0)
15068 mode = GET_MODE (crtl->return_rtx);
15070 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15072 return GET_MODE_SIZE (mode);
15076 emit_sfm (int base_reg, int count)
15083 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15084 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15086 reg = gen_rtx_REG (XFmode, base_reg++);
15088 XVECEXP (par, 0, 0)
15089 = gen_rtx_SET (VOIDmode,
15092 gen_rtx_PRE_MODIFY (Pmode,
15095 (stack_pointer_rtx,
15098 gen_rtx_UNSPEC (BLKmode,
15099 gen_rtvec (1, reg),
15100 UNSPEC_PUSH_MULT));
15101 tmp = gen_rtx_SET (VOIDmode,
15102 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15103 RTX_FRAME_RELATED_P (tmp) = 1;
15104 XVECEXP (dwarf, 0, 1) = tmp;
15106 for (i = 1; i < count; i++)
15108 reg = gen_rtx_REG (XFmode, base_reg++);
15109 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15111 tmp = gen_rtx_SET (VOIDmode,
15112 gen_frame_mem (XFmode,
15113 plus_constant (stack_pointer_rtx,
15116 RTX_FRAME_RELATED_P (tmp) = 1;
15117 XVECEXP (dwarf, 0, i + 1) = tmp;
15120 tmp = gen_rtx_SET (VOIDmode,
15122 plus_constant (stack_pointer_rtx, -12 * count));
15124 RTX_FRAME_RELATED_P (tmp) = 1;
15125 XVECEXP (dwarf, 0, 0) = tmp;
15127 par = emit_insn (par);
15128 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15134 /* Return true if the current function needs to save/restore LR. */
15137 thumb_force_lr_save (void)
15139 return !cfun->machine->lr_save_eliminated
15140 && (!leaf_function_p ()
15141 || thumb_far_jump_used_p ()
15142 || df_regs_ever_live_p (LR_REGNUM));
15146 /* Compute the distance from register FROM to register TO.
15147 These can be the arg pointer (26), the soft frame pointer (25),
15148 the stack pointer (13) or the hard frame pointer (11).
15149 In thumb mode r7 is used as the soft frame pointer, if needed.
15150 Typical stack layout looks like this:
15152 old stack pointer -> | |
15155 | | saved arguments for
15156 | | vararg functions
15159 hard FP & arg pointer -> | | \
15167 soft frame pointer -> | | /
15172 locals base pointer -> | | /
15177 current stack pointer -> | | /
15180 For a given function some or all of these stack components
15181 may not be needed, giving rise to the possibility of
15182 eliminating some of the registers.
15184 The values returned by this function must reflect the behavior
15185 of arm_expand_prologue() and arm_compute_save_reg_mask().
15187 The sign of the number returned reflects the direction of stack
15188 growth, so the values are positive for all eliminations except
15189 from the soft frame pointer to the hard frame pointer.
15191 SFP may point just inside the local variables block to ensure correct
15195 /* Calculate stack offsets. These are used to calculate register elimination
15196 offsets and in prologue/epilogue code. Also calculates which registers
15197 should be saved. */
15199 static arm_stack_offsets *
15200 arm_get_frame_offsets (void)
15202 struct arm_stack_offsets *offsets;
15203 unsigned long func_type;
15207 HOST_WIDE_INT frame_size;
15210 offsets = &cfun->machine->stack_offsets;
15212 /* We need to know if we are a leaf function. Unfortunately, it
15213 is possible to be called after start_sequence has been called,
15214 which causes get_insns to return the insns for the sequence,
15215 not the function, which will cause leaf_function_p to return
15216 the incorrect result.
15218 to know about leaf functions once reload has completed, and the
15219 frame size cannot be changed after that time, so we can safely
15220 use the cached value. */
15222 if (reload_completed)
15225 /* Initially this is the size of the local variables. It will translated
15226 into an offset once we have determined the size of preceding data. */
15227 frame_size = ROUND_UP_WORD (get_frame_size ());
15229 leaf = leaf_function_p ();
15231 /* Space for variadic functions. */
15232 offsets->saved_args = crtl->args.pretend_args_size;
15234 /* In Thumb mode this is incorrect, but never used. */
15235 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15236 arm_compute_static_chain_stack_bytes();
15240 unsigned int regno;
15242 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15243 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15244 saved = core_saved;
15246 /* We know that SP will be doubleword aligned on entry, and we must
15247 preserve that condition at any subroutine call. We also require the
15248 soft frame pointer to be doubleword aligned. */
15250 if (TARGET_REALLY_IWMMXT)
15252 /* Check for the call-saved iWMMXt registers. */
15253 for (regno = FIRST_IWMMXT_REGNUM;
15254 regno <= LAST_IWMMXT_REGNUM;
15256 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15260 func_type = arm_current_func_type ();
15261 if (! IS_VOLATILE (func_type))
15263 /* Space for saved FPA registers. */
15264 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15265 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15268 /* Space for saved VFP registers. */
15269 if (TARGET_HARD_FLOAT && TARGET_VFP)
15270 saved += arm_get_vfp_saved_size ();
15273 else /* TARGET_THUMB1 */
15275 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15276 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15277 saved = core_saved;
15278 if (TARGET_BACKTRACE)
15282 /* Saved registers include the stack frame. */
15283 offsets->saved_regs = offsets->saved_args + saved +
15284 arm_compute_static_chain_stack_bytes();
15285 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15286 /* A leaf function does not need any stack alignment if it has nothing
15288 if (leaf && frame_size == 0)
15290 offsets->outgoing_args = offsets->soft_frame;
15291 offsets->locals_base = offsets->soft_frame;
15295 /* Ensure SFP has the correct alignment. */
15296 if (ARM_DOUBLEWORD_ALIGN
15297 && (offsets->soft_frame & 7))
15299 offsets->soft_frame += 4;
15300 /* Try to align stack by pushing an extra reg. Don't bother doing this
15301 when there is a stack frame as the alignment will be rolled into
15302 the normal stack adjustment. */
15303 if (frame_size + crtl->outgoing_args_size == 0)
15307 /* If it is safe to use r3, then do so. This sometimes
15308 generates better code on Thumb-2 by avoiding the need to
15309 use 32-bit push/pop instructions. */
15310 if (!crtl->tail_call_emit
15311 && arm_size_return_regs () <= 12
15312 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15317 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15319 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15328 offsets->saved_regs += 4;
15329 offsets->saved_regs_mask |= (1 << reg);
15334 offsets->locals_base = offsets->soft_frame + frame_size;
15335 offsets->outgoing_args = (offsets->locals_base
15336 + crtl->outgoing_args_size);
15338 if (ARM_DOUBLEWORD_ALIGN)
15340 /* Ensure SP remains doubleword aligned. */
15341 if (offsets->outgoing_args & 7)
15342 offsets->outgoing_args += 4;
15343 gcc_assert (!(offsets->outgoing_args & 7));
15350 /* Calculate the relative offsets for the different stack pointers. Positive
15351 offsets are in the direction of stack growth. */
15354 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15356 arm_stack_offsets *offsets;
15358 offsets = arm_get_frame_offsets ();
15360 /* OK, now we have enough information to compute the distances.
15361 There must be an entry in these switch tables for each pair
15362 of registers in ELIMINABLE_REGS, even if some of the entries
15363 seem to be redundant or useless. */
15366 case ARG_POINTER_REGNUM:
15369 case THUMB_HARD_FRAME_POINTER_REGNUM:
15372 case FRAME_POINTER_REGNUM:
15373 /* This is the reverse of the soft frame pointer
15374 to hard frame pointer elimination below. */
15375 return offsets->soft_frame - offsets->saved_args;
15377 case ARM_HARD_FRAME_POINTER_REGNUM:
15378 /* This is only non-zero in the case where the static chain register
15379 is stored above the frame. */
15380 return offsets->frame - offsets->saved_args - 4;
15382 case STACK_POINTER_REGNUM:
15383 /* If nothing has been pushed on the stack at all
15384 then this will return -4. This *is* correct! */
15385 return offsets->outgoing_args - (offsets->saved_args + 4);
15388 gcc_unreachable ();
15390 gcc_unreachable ();
15392 case FRAME_POINTER_REGNUM:
15395 case THUMB_HARD_FRAME_POINTER_REGNUM:
15398 case ARM_HARD_FRAME_POINTER_REGNUM:
15399 /* The hard frame pointer points to the top entry in the
15400 stack frame. The soft frame pointer to the bottom entry
15401 in the stack frame. If there is no stack frame at all,
15402 then they are identical. */
15404 return offsets->frame - offsets->soft_frame;
15406 case STACK_POINTER_REGNUM:
15407 return offsets->outgoing_args - offsets->soft_frame;
15410 gcc_unreachable ();
15412 gcc_unreachable ();
15415 /* You cannot eliminate from the stack pointer.
15416 In theory you could eliminate from the hard frame
15417 pointer to the stack pointer, but this will never
15418 happen, since if a stack frame is not needed the
15419 hard frame pointer will never be used. */
15420 gcc_unreachable ();
15424 /* Given FROM and TO register numbers, say whether this elimination is
15425 allowed. Frame pointer elimination is automatically handled.
15427 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15428 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15429 pointer, we must eliminate FRAME_POINTER_REGNUM into
15430 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15431 ARG_POINTER_REGNUM. */
15434 arm_can_eliminate (const int from, const int to)
15436 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15437 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15438 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15439 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15443 /* Emit RTL to save coprocessor registers on function entry. Returns the
15444 number of bytes pushed. */
15447 arm_save_coproc_regs(void)
15449 int saved_size = 0;
15451 unsigned start_reg;
15454 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15455 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15457 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15458 insn = gen_rtx_MEM (V2SImode, insn);
15459 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15460 RTX_FRAME_RELATED_P (insn) = 1;
15464 /* Save any floating point call-saved registers used by this
15466 if (TARGET_FPA_EMU2)
15468 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15469 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15471 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15472 insn = gen_rtx_MEM (XFmode, insn);
15473 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15474 RTX_FRAME_RELATED_P (insn) = 1;
15480 start_reg = LAST_FPA_REGNUM;
15482 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15484 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15486 if (start_reg - reg == 3)
15488 insn = emit_sfm (reg, 4);
15489 RTX_FRAME_RELATED_P (insn) = 1;
15491 start_reg = reg - 1;
15496 if (start_reg != reg)
15498 insn = emit_sfm (reg + 1, start_reg - reg);
15499 RTX_FRAME_RELATED_P (insn) = 1;
15500 saved_size += (start_reg - reg) * 12;
15502 start_reg = reg - 1;
15506 if (start_reg != reg)
15508 insn = emit_sfm (reg + 1, start_reg - reg);
15509 saved_size += (start_reg - reg) * 12;
15510 RTX_FRAME_RELATED_P (insn) = 1;
15513 if (TARGET_HARD_FLOAT && TARGET_VFP)
15515 start_reg = FIRST_VFP_REGNUM;
15517 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15519 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15520 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15522 if (start_reg != reg)
15523 saved_size += vfp_emit_fstmd (start_reg,
15524 (reg - start_reg) / 2);
15525 start_reg = reg + 2;
15528 if (start_reg != reg)
15529 saved_size += vfp_emit_fstmd (start_reg,
15530 (reg - start_reg) / 2);
15536 /* Set the Thumb frame pointer from the stack pointer. */
15539 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15541 HOST_WIDE_INT amount;
15544 amount = offsets->outgoing_args - offsets->locals_base;
15546 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15547 stack_pointer_rtx, GEN_INT (amount)));
15550 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15551 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15552 expects the first two operands to be the same. */
15555 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15557 hard_frame_pointer_rtx));
15561 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15562 hard_frame_pointer_rtx,
15563 stack_pointer_rtx));
15565 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15566 plus_constant (stack_pointer_rtx, amount));
15567 RTX_FRAME_RELATED_P (dwarf) = 1;
15568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15571 RTX_FRAME_RELATED_P (insn) = 1;
15574 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15577 arm_expand_prologue (void)
15582 unsigned long live_regs_mask;
15583 unsigned long func_type;
15585 int saved_pretend_args = 0;
15586 int saved_regs = 0;
15587 unsigned HOST_WIDE_INT args_to_push;
15588 arm_stack_offsets *offsets;
15590 func_type = arm_current_func_type ();
15592 /* Naked functions don't have prologues. */
15593 if (IS_NAKED (func_type))
15596 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15597 args_to_push = crtl->args.pretend_args_size;
15599 /* Compute which register we will have to save onto the stack. */
15600 offsets = arm_get_frame_offsets ();
15601 live_regs_mask = offsets->saved_regs_mask;
15603 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15605 if (IS_STACKALIGN (func_type))
15610 /* Handle a word-aligned stack pointer. We generate the following:
15615 <save and restore r0 in normal prologue/epilogue>
15619 The unwinder doesn't need to know about the stack realignment.
15620 Just tell it we saved SP in r0. */
15621 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15623 r0 = gen_rtx_REG (SImode, 0);
15624 r1 = gen_rtx_REG (SImode, 1);
15625 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15626 compiler won't choke. */
15627 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15628 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15629 insn = gen_movsi (r0, stack_pointer_rtx);
15630 RTX_FRAME_RELATED_P (insn) = 1;
15631 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15633 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15634 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15637 /* For APCS frames, if IP register is clobbered
15638 when creating frame, save that register in a special
15640 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15642 if (IS_INTERRUPT (func_type))
15644 /* Interrupt functions must not corrupt any registers.
15645 Creating a frame pointer however, corrupts the IP
15646 register, so we must push it first. */
15647 insn = emit_multi_reg_push (1 << IP_REGNUM);
15649 /* Do not set RTX_FRAME_RELATED_P on this insn.
15650 The dwarf stack unwinding code only wants to see one
15651 stack decrement per function, and this is not it. If
15652 this instruction is labeled as being part of the frame
15653 creation sequence then dwarf2out_frame_debug_expr will
15654 die when it encounters the assignment of IP to FP
15655 later on, since the use of SP here establishes SP as
15656 the CFA register and not IP.
15658 Anyway this instruction is not really part of the stack
15659 frame creation although it is part of the prologue. */
15661 else if (IS_NESTED (func_type))
15663 /* The Static chain register is the same as the IP register
15664 used as a scratch register during stack frame creation.
15665 To get around this need to find somewhere to store IP
15666 whilst the frame is being created. We try the following
15669 1. The last argument register.
15670 2. A slot on the stack above the frame. (This only
15671 works if the function is not a varargs function).
15672 3. Register r3, after pushing the argument registers
15675 Note - we only need to tell the dwarf2 backend about the SP
15676 adjustment in the second variant; the static chain register
15677 doesn't need to be unwound, as it doesn't contain a value
15678 inherited from the caller. */
15680 if (df_regs_ever_live_p (3) == false)
15681 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15682 else if (args_to_push == 0)
15686 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15689 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15690 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15693 /* Just tell the dwarf backend that we adjusted SP. */
15694 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15695 plus_constant (stack_pointer_rtx,
15697 RTX_FRAME_RELATED_P (insn) = 1;
15698 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15702 /* Store the args on the stack. */
15703 if (cfun->machine->uses_anonymous_args)
15704 insn = emit_multi_reg_push
15705 ((0xf0 >> (args_to_push / 4)) & 0xf);
15708 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15709 GEN_INT (- args_to_push)));
15711 RTX_FRAME_RELATED_P (insn) = 1;
15713 saved_pretend_args = 1;
15714 fp_offset = args_to_push;
15717 /* Now reuse r3 to preserve IP. */
15718 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15722 insn = emit_set_insn (ip_rtx,
15723 plus_constant (stack_pointer_rtx, fp_offset));
15724 RTX_FRAME_RELATED_P (insn) = 1;
15729 /* Push the argument registers, or reserve space for them. */
15730 if (cfun->machine->uses_anonymous_args)
15731 insn = emit_multi_reg_push
15732 ((0xf0 >> (args_to_push / 4)) & 0xf);
15735 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15736 GEN_INT (- args_to_push)));
15737 RTX_FRAME_RELATED_P (insn) = 1;
15740 /* If this is an interrupt service routine, and the link register
15741 is going to be pushed, and we're not generating extra
15742 push of IP (needed when frame is needed and frame layout if apcs),
15743 subtracting four from LR now will mean that the function return
15744 can be done with a single instruction. */
15745 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15746 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15747 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15750 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15752 emit_set_insn (lr, plus_constant (lr, -4));
15755 if (live_regs_mask)
15757 saved_regs += bit_count (live_regs_mask) * 4;
15758 if (optimize_size && !frame_pointer_needed
15759 && saved_regs == offsets->saved_regs - offsets->saved_args)
15761 /* If no coprocessor registers are being pushed and we don't have
15762 to worry about a frame pointer then push extra registers to
15763 create the stack frame. This is done is a way that does not
15764 alter the frame layout, so is independent of the epilogue. */
15768 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15770 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15771 if (frame && n * 4 >= frame)
15774 live_regs_mask |= (1 << n) - 1;
15775 saved_regs += frame;
15778 insn = emit_multi_reg_push (live_regs_mask);
15779 RTX_FRAME_RELATED_P (insn) = 1;
15782 if (! IS_VOLATILE (func_type))
15783 saved_regs += arm_save_coproc_regs ();
15785 if (frame_pointer_needed && TARGET_ARM)
15787 /* Create the new frame pointer. */
15788 if (TARGET_APCS_FRAME)
15790 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15791 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15792 RTX_FRAME_RELATED_P (insn) = 1;
15794 if (IS_NESTED (func_type))
15796 /* Recover the static chain register. */
15797 if (!df_regs_ever_live_p (3)
15798 || saved_pretend_args)
15799 insn = gen_rtx_REG (SImode, 3);
15800 else /* if (crtl->args.pretend_args_size == 0) */
15802 insn = plus_constant (hard_frame_pointer_rtx, 4);
15803 insn = gen_frame_mem (SImode, insn);
15805 emit_set_insn (ip_rtx, insn);
15806 /* Add a USE to stop propagate_one_insn() from barfing. */
15807 emit_insn (gen_prologue_use (ip_rtx));
15812 insn = GEN_INT (saved_regs - 4);
15813 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15814 stack_pointer_rtx, insn));
15815 RTX_FRAME_RELATED_P (insn) = 1;
15819 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15821 /* This add can produce multiple insns for a large constant, so we
15822 need to get tricky. */
15823 rtx last = get_last_insn ();
15825 amount = GEN_INT (offsets->saved_args + saved_regs
15826 - offsets->outgoing_args);
15828 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15832 last = last ? NEXT_INSN (last) : get_insns ();
15833 RTX_FRAME_RELATED_P (last) = 1;
15835 while (last != insn);
15837 /* If the frame pointer is needed, emit a special barrier that
15838 will prevent the scheduler from moving stores to the frame
15839 before the stack adjustment. */
15840 if (frame_pointer_needed)
15841 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15842 hard_frame_pointer_rtx));
15846 if (frame_pointer_needed && TARGET_THUMB2)
15847 thumb_set_frame_pointer (offsets);
15849 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15851 unsigned long mask;
15853 mask = live_regs_mask;
15854 mask &= THUMB2_WORK_REGS;
15855 if (!IS_NESTED (func_type))
15856 mask |= (1 << IP_REGNUM);
15857 arm_load_pic_register (mask);
15860 /* If we are profiling, make sure no instructions are scheduled before
15861 the call to mcount. Similarly if the user has requested no
15862 scheduling in the prolog. Similarly if we want non-call exceptions
15863 using the EABI unwinder, to prevent faulting instructions from being
15864 swapped with a stack adjustment. */
15865 if (crtl->profile || !TARGET_SCHED_PROLOG
15866 || (arm_except_unwind_info (&global_options) == UI_TARGET
15867 && cfun->can_throw_non_call_exceptions))
15868 emit_insn (gen_blockage ());
15870 /* If the link register is being kept alive, with the return address in it,
15871 then make sure that it does not get reused by the ce2 pass. */
15872 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15873 cfun->machine->lr_save_eliminated = 1;
15876 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15878 arm_print_condition (FILE *stream)
15880 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15882 /* Branch conversion is not implemented for Thumb-2. */
15885 output_operand_lossage ("predicated Thumb instruction");
15888 if (current_insn_predicate != NULL)
15890 output_operand_lossage
15891 ("predicated instruction in conditional sequence");
15895 fputs (arm_condition_codes[arm_current_cc], stream);
15897 else if (current_insn_predicate)
15899 enum arm_cond_code code;
15903 output_operand_lossage ("predicated Thumb instruction");
15907 code = get_arm_condition_code (current_insn_predicate);
15908 fputs (arm_condition_codes[code], stream);
15913 /* If CODE is 'd', then the X is a condition operand and the instruction
15914 should only be executed if the condition is true.
15915 if CODE is 'D', then the X is a condition operand and the instruction
15916 should only be executed if the condition is false: however, if the mode
15917 of the comparison is CCFPEmode, then always execute the instruction -- we
15918 do this because in these circumstances !GE does not necessarily imply LT;
15919 in these cases the instruction pattern will take care to make sure that
15920 an instruction containing %d will follow, thereby undoing the effects of
15921 doing this instruction unconditionally.
15922 If CODE is 'N' then X is a floating point operand that must be negated
15924 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15925 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15927 arm_print_operand (FILE *stream, rtx x, int code)
15932 fputs (ASM_COMMENT_START, stream);
15936 fputs (user_label_prefix, stream);
15940 fputs (REGISTER_PREFIX, stream);
15944 arm_print_condition (stream);
15948 /* Nothing in unified syntax, otherwise the current condition code. */
15949 if (!TARGET_UNIFIED_ASM)
15950 arm_print_condition (stream);
15954 /* The current condition code in unified syntax, otherwise nothing. */
15955 if (TARGET_UNIFIED_ASM)
15956 arm_print_condition (stream);
15960 /* The current condition code for a condition code setting instruction.
15961 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15962 if (TARGET_UNIFIED_ASM)
15964 fputc('s', stream);
15965 arm_print_condition (stream);
15969 arm_print_condition (stream);
15970 fputc('s', stream);
15975 /* If the instruction is conditionally executed then print
15976 the current condition code, otherwise print 's'. */
15977 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15978 if (current_insn_predicate)
15979 arm_print_condition (stream);
15981 fputc('s', stream);
15984 /* %# is a "break" sequence. It doesn't output anything, but is used to
15985 separate e.g. operand numbers from following text, if that text consists
15986 of further digits which we don't want to be part of the operand
15994 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15995 r = real_value_negate (&r);
15996 fprintf (stream, "%s", fp_const_from_val (&r));
16000 /* An integer or symbol address without a preceding # sign. */
16002 switch (GET_CODE (x))
16005 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16009 output_addr_const (stream, x);
16013 gcc_unreachable ();
16018 if (GET_CODE (x) == CONST_INT)
16021 val = ARM_SIGN_EXTEND (~INTVAL (x));
16022 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16026 putc ('~', stream);
16027 output_addr_const (stream, x);
16032 /* The low 16 bits of an immediate constant. */
16033 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16037 fprintf (stream, "%s", arithmetic_instr (x, 1));
16040 /* Truncate Cirrus shift counts. */
16042 if (GET_CODE (x) == CONST_INT)
16044 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16047 arm_print_operand (stream, x, 0);
16051 fprintf (stream, "%s", arithmetic_instr (x, 0));
16059 if (!shift_operator (x, SImode))
16061 output_operand_lossage ("invalid shift operand");
16065 shift = shift_op (x, &val);
16069 fprintf (stream, ", %s ", shift);
16071 arm_print_operand (stream, XEXP (x, 1), 0);
16073 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16078 /* An explanation of the 'Q', 'R' and 'H' register operands:
16080 In a pair of registers containing a DI or DF value the 'Q'
16081 operand returns the register number of the register containing
16082 the least significant part of the value. The 'R' operand returns
16083 the register number of the register containing the most
16084 significant part of the value.
16086 The 'H' operand returns the higher of the two register numbers.
16087 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16088 same as the 'Q' operand, since the most significant part of the
16089 value is held in the lower number register. The reverse is true
16090 on systems where WORDS_BIG_ENDIAN is false.
16092 The purpose of these operands is to distinguish between cases
16093 where the endian-ness of the values is important (for example
16094 when they are added together), and cases where the endian-ness
16095 is irrelevant, but the order of register operations is important.
16096 For example when loading a value from memory into a register
16097 pair, the endian-ness does not matter. Provided that the value
16098 from the lower memory address is put into the lower numbered
16099 register, and the value from the higher address is put into the
16100 higher numbered register, the load will work regardless of whether
16101 the value being loaded is big-wordian or little-wordian. The
16102 order of the two register loads can matter however, if the address
16103 of the memory location is actually held in one of the registers
16104 being overwritten by the load.
16106 The 'Q' and 'R' constraints are also available for 64-bit
16109 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16111 rtx part = gen_lowpart (SImode, x);
16112 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16116 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16118 output_operand_lossage ("invalid operand for code '%c'", code);
16122 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16126 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16128 enum machine_mode mode = GET_MODE (x);
16131 if (mode == VOIDmode)
16133 part = gen_highpart_mode (SImode, mode, x);
16134 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16138 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16140 output_operand_lossage ("invalid operand for code '%c'", code);
16144 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16148 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16150 output_operand_lossage ("invalid operand for code '%c'", code);
16154 asm_fprintf (stream, "%r", REGNO (x) + 1);
16158 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16160 output_operand_lossage ("invalid operand for code '%c'", code);
16164 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16168 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16170 output_operand_lossage ("invalid operand for code '%c'", code);
16174 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16178 asm_fprintf (stream, "%r",
16179 GET_CODE (XEXP (x, 0)) == REG
16180 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16184 asm_fprintf (stream, "{%r-%r}",
16186 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16189 /* Like 'M', but writing doubleword vector registers, for use by Neon
16193 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16194 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16196 asm_fprintf (stream, "{d%d}", regno);
16198 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16203 /* CONST_TRUE_RTX means always -- that's the default. */
16204 if (x == const_true_rtx)
16207 if (!COMPARISON_P (x))
16209 output_operand_lossage ("invalid operand for code '%c'", code);
16213 fputs (arm_condition_codes[get_arm_condition_code (x)],
16218 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16219 want to do that. */
16220 if (x == const_true_rtx)
16222 output_operand_lossage ("instruction never executed");
16225 if (!COMPARISON_P (x))
16227 output_operand_lossage ("invalid operand for code '%c'", code);
16231 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16232 (get_arm_condition_code (x))],
16236 /* Cirrus registers can be accessed in a variety of ways:
16237 single floating point (f)
16238 double floating point (d)
16240 64bit integer (dx). */
16241 case 'W': /* Cirrus register in F mode. */
16242 case 'X': /* Cirrus register in D mode. */
16243 case 'Y': /* Cirrus register in FX mode. */
16244 case 'Z': /* Cirrus register in DX mode. */
16245 gcc_assert (GET_CODE (x) == REG
16246 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16248 fprintf (stream, "mv%s%s",
16250 : code == 'X' ? "d"
16251 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16255 /* Print cirrus register in the mode specified by the register's mode. */
16258 int mode = GET_MODE (x);
16260 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16262 output_operand_lossage ("invalid operand for code '%c'", code);
16266 fprintf (stream, "mv%s%s",
16267 mode == DFmode ? "d"
16268 : mode == SImode ? "fx"
16269 : mode == DImode ? "dx"
16270 : "f", reg_names[REGNO (x)] + 2);
16276 if (GET_CODE (x) != REG
16277 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16278 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16279 /* Bad value for wCG register number. */
16281 output_operand_lossage ("invalid operand for code '%c'", code);
16286 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16289 /* Print an iWMMXt control register name. */
16291 if (GET_CODE (x) != CONST_INT
16293 || INTVAL (x) >= 16)
16294 /* Bad value for wC register number. */
16296 output_operand_lossage ("invalid operand for code '%c'", code);
16302 static const char * wc_reg_names [16] =
16304 "wCID", "wCon", "wCSSF", "wCASF",
16305 "wC4", "wC5", "wC6", "wC7",
16306 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16307 "wC12", "wC13", "wC14", "wC15"
16310 fprintf (stream, wc_reg_names [INTVAL (x)]);
16314 /* Print the high single-precision register of a VFP double-precision
16318 int mode = GET_MODE (x);
16321 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16323 output_operand_lossage ("invalid operand for code '%c'", code);
16328 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16330 output_operand_lossage ("invalid operand for code '%c'", code);
16334 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16338 /* Print a VFP/Neon double precision or quad precision register name. */
16342 int mode = GET_MODE (x);
16343 int is_quad = (code == 'q');
16346 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16348 output_operand_lossage ("invalid operand for code '%c'", code);
16352 if (GET_CODE (x) != REG
16353 || !IS_VFP_REGNUM (REGNO (x)))
16355 output_operand_lossage ("invalid operand for code '%c'", code);
16360 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16361 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16363 output_operand_lossage ("invalid operand for code '%c'", code);
16367 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16368 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16372 /* These two codes print the low/high doubleword register of a Neon quad
16373 register, respectively. For pair-structure types, can also print
16374 low/high quadword registers. */
16378 int mode = GET_MODE (x);
16381 if ((GET_MODE_SIZE (mode) != 16
16382 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16384 output_operand_lossage ("invalid operand for code '%c'", code);
16389 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16391 output_operand_lossage ("invalid operand for code '%c'", code);
16395 if (GET_MODE_SIZE (mode) == 16)
16396 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16397 + (code == 'f' ? 1 : 0));
16399 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16400 + (code == 'f' ? 1 : 0));
16404 /* Print a VFPv3 floating-point constant, represented as an integer
16408 int index = vfp3_const_double_index (x);
16409 gcc_assert (index != -1);
16410 fprintf (stream, "%d", index);
16414 /* Print bits representing opcode features for Neon.
16416 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16417 and polynomials as unsigned.
16419 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16421 Bit 2 is 1 for rounding functions, 0 otherwise. */
16423 /* Identify the type as 's', 'u', 'p' or 'f'. */
16426 HOST_WIDE_INT bits = INTVAL (x);
16427 fputc ("uspf"[bits & 3], stream);
16431 /* Likewise, but signed and unsigned integers are both 'i'. */
16434 HOST_WIDE_INT bits = INTVAL (x);
16435 fputc ("iipf"[bits & 3], stream);
16439 /* As for 'T', but emit 'u' instead of 'p'. */
16442 HOST_WIDE_INT bits = INTVAL (x);
16443 fputc ("usuf"[bits & 3], stream);
16447 /* Bit 2: rounding (vs none). */
16450 HOST_WIDE_INT bits = INTVAL (x);
16451 fputs ((bits & 4) != 0 ? "r" : "", stream);
16455 /* Memory operand for vld1/vst1 instruction. */
16459 bool postinc = FALSE;
16460 unsigned align, modesize, align_bits;
16462 gcc_assert (GET_CODE (x) == MEM);
16463 addr = XEXP (x, 0);
16464 if (GET_CODE (addr) == POST_INC)
16467 addr = XEXP (addr, 0);
16469 asm_fprintf (stream, "[%r", REGNO (addr));
16471 /* We know the alignment of this access, so we can emit a hint in the
16472 instruction (for some alignments) as an aid to the memory subsystem
16474 align = MEM_ALIGN (x) >> 3;
16475 modesize = GET_MODE_SIZE (GET_MODE (x));
16477 /* Only certain alignment specifiers are supported by the hardware. */
16478 if (modesize == 16 && (align % 32) == 0)
16480 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16482 else if ((align % 8) == 0)
16487 if (align_bits != 0)
16488 asm_fprintf (stream, ":%d", align_bits);
16490 asm_fprintf (stream, "]");
16493 fputs("!", stream);
16501 gcc_assert (GET_CODE (x) == MEM);
16502 addr = XEXP (x, 0);
16503 gcc_assert (GET_CODE (addr) == REG);
16504 asm_fprintf (stream, "[%r]", REGNO (addr));
16508 /* Translate an S register number into a D register number and element index. */
16511 int mode = GET_MODE (x);
16514 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16516 output_operand_lossage ("invalid operand for code '%c'", code);
16521 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16523 output_operand_lossage ("invalid operand for code '%c'", code);
16527 regno = regno - FIRST_VFP_REGNUM;
16528 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16532 /* Register specifier for vld1.16/vst1.16. Translate the S register
16533 number into a D register number and element index. */
16536 int mode = GET_MODE (x);
16539 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16541 output_operand_lossage ("invalid operand for code '%c'", code);
16546 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16548 output_operand_lossage ("invalid operand for code '%c'", code);
16552 regno = regno - FIRST_VFP_REGNUM;
16553 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16560 output_operand_lossage ("missing operand");
16564 switch (GET_CODE (x))
16567 asm_fprintf (stream, "%r", REGNO (x));
16571 output_memory_reference_mode = GET_MODE (x);
16572 output_address (XEXP (x, 0));
16579 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16580 sizeof (fpstr), 0, 1);
16581 fprintf (stream, "#%s", fpstr);
16584 fprintf (stream, "#%s", fp_immediate_constant (x));
16588 gcc_assert (GET_CODE (x) != NEG);
16589 fputc ('#', stream);
16590 if (GET_CODE (x) == HIGH)
16592 fputs (":lower16:", stream);
16596 output_addr_const (stream, x);
16602 /* Target hook for printing a memory address. */
16604 arm_print_operand_address (FILE *stream, rtx x)
16608 int is_minus = GET_CODE (x) == MINUS;
16610 if (GET_CODE (x) == REG)
16611 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16612 else if (GET_CODE (x) == PLUS || is_minus)
16614 rtx base = XEXP (x, 0);
16615 rtx index = XEXP (x, 1);
16616 HOST_WIDE_INT offset = 0;
16617 if (GET_CODE (base) != REG
16618 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16620 /* Ensure that BASE is a register. */
16621 /* (one of them must be). */
16622 /* Also ensure the SP is not used as in index register. */
16627 switch (GET_CODE (index))
16630 offset = INTVAL (index);
16633 asm_fprintf (stream, "[%r, #%wd]",
16634 REGNO (base), offset);
16638 asm_fprintf (stream, "[%r, %s%r]",
16639 REGNO (base), is_minus ? "-" : "",
16649 asm_fprintf (stream, "[%r, %s%r",
16650 REGNO (base), is_minus ? "-" : "",
16651 REGNO (XEXP (index, 0)));
16652 arm_print_operand (stream, index, 'S');
16653 fputs ("]", stream);
16658 gcc_unreachable ();
16661 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16662 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16664 extern enum machine_mode output_memory_reference_mode;
16666 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16668 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16669 asm_fprintf (stream, "[%r, #%s%d]!",
16670 REGNO (XEXP (x, 0)),
16671 GET_CODE (x) == PRE_DEC ? "-" : "",
16672 GET_MODE_SIZE (output_memory_reference_mode));
16674 asm_fprintf (stream, "[%r], #%s%d",
16675 REGNO (XEXP (x, 0)),
16676 GET_CODE (x) == POST_DEC ? "-" : "",
16677 GET_MODE_SIZE (output_memory_reference_mode));
16679 else if (GET_CODE (x) == PRE_MODIFY)
16681 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16682 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16683 asm_fprintf (stream, "#%wd]!",
16684 INTVAL (XEXP (XEXP (x, 1), 1)));
16686 asm_fprintf (stream, "%r]!",
16687 REGNO (XEXP (XEXP (x, 1), 1)));
16689 else if (GET_CODE (x) == POST_MODIFY)
16691 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16692 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16693 asm_fprintf (stream, "#%wd",
16694 INTVAL (XEXP (XEXP (x, 1), 1)));
16696 asm_fprintf (stream, "%r",
16697 REGNO (XEXP (XEXP (x, 1), 1)));
16699 else output_addr_const (stream, x);
16703 if (GET_CODE (x) == REG)
16704 asm_fprintf (stream, "[%r]", REGNO (x));
16705 else if (GET_CODE (x) == POST_INC)
16706 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16707 else if (GET_CODE (x) == PLUS)
16709 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16710 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16711 asm_fprintf (stream, "[%r, #%wd]",
16712 REGNO (XEXP (x, 0)),
16713 INTVAL (XEXP (x, 1)));
16715 asm_fprintf (stream, "[%r, %r]",
16716 REGNO (XEXP (x, 0)),
16717 REGNO (XEXP (x, 1)));
16720 output_addr_const (stream, x);
16724 /* Target hook for indicating whether a punctuation character for
16725 TARGET_PRINT_OPERAND is valid. */
16727 arm_print_operand_punct_valid_p (unsigned char code)
16729 return (code == '@' || code == '|' || code == '.'
16730 || code == '(' || code == ')' || code == '#'
16731 || (TARGET_32BIT && (code == '?'))
16732 || (TARGET_THUMB2 && (code == '!'))
16733 || (TARGET_THUMB && (code == '_')));
16736 /* Target hook for assembling integer objects. The ARM version needs to
16737 handle word-sized values specially. */
16739 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16741 enum machine_mode mode;
16743 if (size == UNITS_PER_WORD && aligned_p)
16745 fputs ("\t.word\t", asm_out_file);
16746 output_addr_const (asm_out_file, x);
16748 /* Mark symbols as position independent. We only do this in the
16749 .text segment, not in the .data segment. */
16750 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16751 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16753 /* See legitimize_pic_address for an explanation of the
16754 TARGET_VXWORKS_RTP check. */
16755 if (TARGET_VXWORKS_RTP
16756 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16757 fputs ("(GOT)", asm_out_file);
16759 fputs ("(GOTOFF)", asm_out_file);
16761 fputc ('\n', asm_out_file);
16765 mode = GET_MODE (x);
16767 if (arm_vector_mode_supported_p (mode))
16771 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16773 units = CONST_VECTOR_NUNITS (x);
16774 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16776 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16777 for (i = 0; i < units; i++)
16779 rtx elt = CONST_VECTOR_ELT (x, i);
16781 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16784 for (i = 0; i < units; i++)
16786 rtx elt = CONST_VECTOR_ELT (x, i);
16787 REAL_VALUE_TYPE rval;
16789 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16792 (rval, GET_MODE_INNER (mode),
16793 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16799 return default_assemble_integer (x, size, aligned_p);
16803 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16807 if (!TARGET_AAPCS_BASED)
16810 default_named_section_asm_out_constructor
16811 : default_named_section_asm_out_destructor) (symbol, priority);
16815 /* Put these in the .init_array section, using a special relocation. */
16816 if (priority != DEFAULT_INIT_PRIORITY)
16819 sprintf (buf, "%s.%.5u",
16820 is_ctor ? ".init_array" : ".fini_array",
16822 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16829 switch_to_section (s);
16830 assemble_align (POINTER_SIZE);
16831 fputs ("\t.word\t", asm_out_file);
16832 output_addr_const (asm_out_file, symbol);
16833 fputs ("(target1)\n", asm_out_file);
16836 /* Add a function to the list of static constructors. */
16839 arm_elf_asm_constructor (rtx symbol, int priority)
16841 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16844 /* Add a function to the list of static destructors. */
16847 arm_elf_asm_destructor (rtx symbol, int priority)
16849 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16852 /* A finite state machine takes care of noticing whether or not instructions
16853 can be conditionally executed, and thus decrease execution time and code
16854 size by deleting branch instructions. The fsm is controlled by
16855 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16857 /* The state of the fsm controlling condition codes are:
16858 0: normal, do nothing special
16859 1: make ASM_OUTPUT_OPCODE not output this instruction
16860 2: make ASM_OUTPUT_OPCODE not output this instruction
16861 3: make instructions conditional
16862 4: make instructions conditional
16864 State transitions (state->state by whom under condition):
16865 0 -> 1 final_prescan_insn if the `target' is a label
16866 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16867 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16868 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16869 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16870 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16871 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16872 (the target insn is arm_target_insn).
16874 If the jump clobbers the conditions then we use states 2 and 4.
16876 A similar thing can be done with conditional return insns.
16878 XXX In case the `target' is an unconditional branch, this conditionalising
16879 of the instructions always reduces code size, but not always execution
16880 time. But then, I want to reduce the code size to somewhere near what
16881 /bin/cc produces. */
16883 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16884 instructions. When a COND_EXEC instruction is seen the subsequent
16885 instructions are scanned so that multiple conditional instructions can be
16886 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16887 specify the length and true/false mask for the IT block. These will be
16888 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16890 /* Returns the index of the ARM condition code string in
16891 `arm_condition_codes'. COMPARISON should be an rtx like
16892 `(eq (...) (...))'. */
16893 static enum arm_cond_code
16894 get_arm_condition_code (rtx comparison)
16896 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16897 enum arm_cond_code code;
16898 enum rtx_code comp_code = GET_CODE (comparison);
16900 if (GET_MODE_CLASS (mode) != MODE_CC)
16901 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16902 XEXP (comparison, 1));
16906 case CC_DNEmode: code = ARM_NE; goto dominance;
16907 case CC_DEQmode: code = ARM_EQ; goto dominance;
16908 case CC_DGEmode: code = ARM_GE; goto dominance;
16909 case CC_DGTmode: code = ARM_GT; goto dominance;
16910 case CC_DLEmode: code = ARM_LE; goto dominance;
16911 case CC_DLTmode: code = ARM_LT; goto dominance;
16912 case CC_DGEUmode: code = ARM_CS; goto dominance;
16913 case CC_DGTUmode: code = ARM_HI; goto dominance;
16914 case CC_DLEUmode: code = ARM_LS; goto dominance;
16915 case CC_DLTUmode: code = ARM_CC;
16918 gcc_assert (comp_code == EQ || comp_code == NE);
16920 if (comp_code == EQ)
16921 return ARM_INVERSE_CONDITION_CODE (code);
16927 case NE: return ARM_NE;
16928 case EQ: return ARM_EQ;
16929 case GE: return ARM_PL;
16930 case LT: return ARM_MI;
16931 default: gcc_unreachable ();
16937 case NE: return ARM_NE;
16938 case EQ: return ARM_EQ;
16939 default: gcc_unreachable ();
16945 case NE: return ARM_MI;
16946 case EQ: return ARM_PL;
16947 default: gcc_unreachable ();
16952 /* These encodings assume that AC=1 in the FPA system control
16953 byte. This allows us to handle all cases except UNEQ and
16957 case GE: return ARM_GE;
16958 case GT: return ARM_GT;
16959 case LE: return ARM_LS;
16960 case LT: return ARM_MI;
16961 case NE: return ARM_NE;
16962 case EQ: return ARM_EQ;
16963 case ORDERED: return ARM_VC;
16964 case UNORDERED: return ARM_VS;
16965 case UNLT: return ARM_LT;
16966 case UNLE: return ARM_LE;
16967 case UNGT: return ARM_HI;
16968 case UNGE: return ARM_PL;
16969 /* UNEQ and LTGT do not have a representation. */
16970 case UNEQ: /* Fall through. */
16971 case LTGT: /* Fall through. */
16972 default: gcc_unreachable ();
16978 case NE: return ARM_NE;
16979 case EQ: return ARM_EQ;
16980 case GE: return ARM_LE;
16981 case GT: return ARM_LT;
16982 case LE: return ARM_GE;
16983 case LT: return ARM_GT;
16984 case GEU: return ARM_LS;
16985 case GTU: return ARM_CC;
16986 case LEU: return ARM_CS;
16987 case LTU: return ARM_HI;
16988 default: gcc_unreachable ();
16994 case LTU: return ARM_CS;
16995 case GEU: return ARM_CC;
16996 default: gcc_unreachable ();
17002 case NE: return ARM_NE;
17003 case EQ: return ARM_EQ;
17004 case GEU: return ARM_CS;
17005 case GTU: return ARM_HI;
17006 case LEU: return ARM_LS;
17007 case LTU: return ARM_CC;
17008 default: gcc_unreachable ();
17014 case GE: return ARM_GE;
17015 case LT: return ARM_LT;
17016 case GEU: return ARM_CS;
17017 case LTU: return ARM_CC;
17018 default: gcc_unreachable ();
17024 case NE: return ARM_NE;
17025 case EQ: return ARM_EQ;
17026 case GE: return ARM_GE;
17027 case GT: return ARM_GT;
17028 case LE: return ARM_LE;
17029 case LT: return ARM_LT;
17030 case GEU: return ARM_CS;
17031 case GTU: return ARM_HI;
17032 case LEU: return ARM_LS;
17033 case LTU: return ARM_CC;
17034 default: gcc_unreachable ();
17037 default: gcc_unreachable ();
17041 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17044 thumb2_final_prescan_insn (rtx insn)
17046 rtx first_insn = insn;
17047 rtx body = PATTERN (insn);
17049 enum arm_cond_code code;
17053 /* Remove the previous insn from the count of insns to be output. */
17054 if (arm_condexec_count)
17055 arm_condexec_count--;
17057 /* Nothing to do if we are already inside a conditional block. */
17058 if (arm_condexec_count)
17061 if (GET_CODE (body) != COND_EXEC)
17064 /* Conditional jumps are implemented directly. */
17065 if (GET_CODE (insn) == JUMP_INSN)
17068 predicate = COND_EXEC_TEST (body);
17069 arm_current_cc = get_arm_condition_code (predicate);
17071 n = get_attr_ce_count (insn);
17072 arm_condexec_count = 1;
17073 arm_condexec_mask = (1 << n) - 1;
17074 arm_condexec_masklen = n;
17075 /* See if subsequent instructions can be combined into the same block. */
17078 insn = next_nonnote_insn (insn);
17080 /* Jumping into the middle of an IT block is illegal, so a label or
17081 barrier terminates the block. */
17082 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17085 body = PATTERN (insn);
17086 /* USE and CLOBBER aren't really insns, so just skip them. */
17087 if (GET_CODE (body) == USE
17088 || GET_CODE (body) == CLOBBER)
17091 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17092 if (GET_CODE (body) != COND_EXEC)
17094 /* Allow up to 4 conditionally executed instructions in a block. */
17095 n = get_attr_ce_count (insn);
17096 if (arm_condexec_masklen + n > 4)
17099 predicate = COND_EXEC_TEST (body);
17100 code = get_arm_condition_code (predicate);
17101 mask = (1 << n) - 1;
17102 if (arm_current_cc == code)
17103 arm_condexec_mask |= (mask << arm_condexec_masklen);
17104 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17107 arm_condexec_count++;
17108 arm_condexec_masklen += n;
17110 /* A jump must be the last instruction in a conditional block. */
17111 if (GET_CODE(insn) == JUMP_INSN)
17114 /* Restore recog_data (getting the attributes of other insns can
17115 destroy this array, but final.c assumes that it remains intact
17116 across this call). */
17117 extract_constrain_insn_cached (first_insn);
17121 arm_final_prescan_insn (rtx insn)
17123 /* BODY will hold the body of INSN. */
17124 rtx body = PATTERN (insn);
17126 /* This will be 1 if trying to repeat the trick, and things need to be
17127 reversed if it appears to fail. */
17130 /* If we start with a return insn, we only succeed if we find another one. */
17131 int seeking_return = 0;
17133 /* START_INSN will hold the insn from where we start looking. This is the
17134 first insn after the following code_label if REVERSE is true. */
17135 rtx start_insn = insn;
17137 /* If in state 4, check if the target branch is reached, in order to
17138 change back to state 0. */
17139 if (arm_ccfsm_state == 4)
17141 if (insn == arm_target_insn)
17143 arm_target_insn = NULL;
17144 arm_ccfsm_state = 0;
17149 /* If in state 3, it is possible to repeat the trick, if this insn is an
17150 unconditional branch to a label, and immediately following this branch
17151 is the previous target label which is only used once, and the label this
17152 branch jumps to is not too far off. */
17153 if (arm_ccfsm_state == 3)
17155 if (simplejump_p (insn))
17157 start_insn = next_nonnote_insn (start_insn);
17158 if (GET_CODE (start_insn) == BARRIER)
17160 /* XXX Isn't this always a barrier? */
17161 start_insn = next_nonnote_insn (start_insn);
17163 if (GET_CODE (start_insn) == CODE_LABEL
17164 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17165 && LABEL_NUSES (start_insn) == 1)
17170 else if (GET_CODE (body) == RETURN)
17172 start_insn = next_nonnote_insn (start_insn);
17173 if (GET_CODE (start_insn) == BARRIER)
17174 start_insn = next_nonnote_insn (start_insn);
17175 if (GET_CODE (start_insn) == CODE_LABEL
17176 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17177 && LABEL_NUSES (start_insn) == 1)
17180 seeking_return = 1;
17189 gcc_assert (!arm_ccfsm_state || reverse);
17190 if (GET_CODE (insn) != JUMP_INSN)
17193 /* This jump might be paralleled with a clobber of the condition codes
17194 the jump should always come first */
17195 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17196 body = XVECEXP (body, 0, 0);
17199 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17200 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17203 int fail = FALSE, succeed = FALSE;
17204 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17205 int then_not_else = TRUE;
17206 rtx this_insn = start_insn, label = 0;
17208 /* Register the insn jumped to. */
17211 if (!seeking_return)
17212 label = XEXP (SET_SRC (body), 0);
17214 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17215 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17216 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17218 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17219 then_not_else = FALSE;
17221 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17222 seeking_return = 1;
17223 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17225 seeking_return = 1;
17226 then_not_else = FALSE;
17229 gcc_unreachable ();
17231 /* See how many insns this branch skips, and what kind of insns. If all
17232 insns are okay, and the label or unconditional branch to the same
17233 label is not too far away, succeed. */
17234 for (insns_skipped = 0;
17235 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17239 this_insn = next_nonnote_insn (this_insn);
17243 switch (GET_CODE (this_insn))
17246 /* Succeed if it is the target label, otherwise fail since
17247 control falls in from somewhere else. */
17248 if (this_insn == label)
17250 arm_ccfsm_state = 1;
17258 /* Succeed if the following insn is the target label.
17260 If return insns are used then the last insn in a function
17261 will be a barrier. */
17262 this_insn = next_nonnote_insn (this_insn);
17263 if (this_insn && this_insn == label)
17265 arm_ccfsm_state = 1;
17273 /* The AAPCS says that conditional calls should not be
17274 used since they make interworking inefficient (the
17275 linker can't transform BL<cond> into BLX). That's
17276 only a problem if the machine has BLX. */
17283 /* Succeed if the following insn is the target label, or
17284 if the following two insns are a barrier and the
17286 this_insn = next_nonnote_insn (this_insn);
17287 if (this_insn && GET_CODE (this_insn) == BARRIER)
17288 this_insn = next_nonnote_insn (this_insn);
17290 if (this_insn && this_insn == label
17291 && insns_skipped < max_insns_skipped)
17293 arm_ccfsm_state = 1;
17301 /* If this is an unconditional branch to the same label, succeed.
17302 If it is to another label, do nothing. If it is conditional,
17304 /* XXX Probably, the tests for SET and the PC are
17307 scanbody = PATTERN (this_insn);
17308 if (GET_CODE (scanbody) == SET
17309 && GET_CODE (SET_DEST (scanbody)) == PC)
17311 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17312 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17314 arm_ccfsm_state = 2;
17317 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17320 /* Fail if a conditional return is undesirable (e.g. on a
17321 StrongARM), but still allow this if optimizing for size. */
17322 else if (GET_CODE (scanbody) == RETURN
17323 && !use_return_insn (TRUE, NULL)
17326 else if (GET_CODE (scanbody) == RETURN
17329 arm_ccfsm_state = 2;
17332 else if (GET_CODE (scanbody) == PARALLEL)
17334 switch (get_attr_conds (this_insn))
17344 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17349 /* Instructions using or affecting the condition codes make it
17351 scanbody = PATTERN (this_insn);
17352 if (!(GET_CODE (scanbody) == SET
17353 || GET_CODE (scanbody) == PARALLEL)
17354 || get_attr_conds (this_insn) != CONDS_NOCOND)
17357 /* A conditional cirrus instruction must be followed by
17358 a non Cirrus instruction. However, since we
17359 conditionalize instructions in this function and by
17360 the time we get here we can't add instructions
17361 (nops), because shorten_branches() has already been
17362 called, we will disable conditionalizing Cirrus
17363 instructions to be safe. */
17364 if (GET_CODE (scanbody) != USE
17365 && GET_CODE (scanbody) != CLOBBER
17366 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17376 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17377 arm_target_label = CODE_LABEL_NUMBER (label);
17380 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17382 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17384 this_insn = next_nonnote_insn (this_insn);
17385 gcc_assert (!this_insn
17386 || (GET_CODE (this_insn) != BARRIER
17387 && GET_CODE (this_insn) != CODE_LABEL));
17391 /* Oh, dear! we ran off the end.. give up. */
17392 extract_constrain_insn_cached (insn);
17393 arm_ccfsm_state = 0;
17394 arm_target_insn = NULL;
17397 arm_target_insn = this_insn;
17400 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17403 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17405 if (reverse || then_not_else)
17406 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17409 /* Restore recog_data (getting the attributes of other insns can
17410 destroy this array, but final.c assumes that it remains intact
17411 across this call. */
17412 extract_constrain_insn_cached (insn);
17416 /* Output IT instructions. */
17418 thumb2_asm_output_opcode (FILE * stream)
17423 if (arm_condexec_mask)
17425 for (n = 0; n < arm_condexec_masklen; n++)
17426 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17428 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17429 arm_condition_codes[arm_current_cc]);
17430 arm_condexec_mask = 0;
17434 /* Returns true if REGNO is a valid register
17435 for holding a quantity of type MODE. */
17437 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17439 if (GET_MODE_CLASS (mode) == MODE_CC)
17440 return (regno == CC_REGNUM
17441 || (TARGET_HARD_FLOAT && TARGET_VFP
17442 && regno == VFPCC_REGNUM));
17445 /* For the Thumb we only allow values bigger than SImode in
17446 registers 0 - 6, so that there is always a second low
17447 register available to hold the upper part of the value.
17448 We probably we ought to ensure that the register is the
17449 start of an even numbered register pair. */
17450 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17452 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17453 && IS_CIRRUS_REGNUM (regno))
17454 /* We have outlawed SI values in Cirrus registers because they
17455 reside in the lower 32 bits, but SF values reside in the
17456 upper 32 bits. This causes gcc all sorts of grief. We can't
17457 even split the registers into pairs because Cirrus SI values
17458 get sign extended to 64bits-- aldyh. */
17459 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17461 if (TARGET_HARD_FLOAT && TARGET_VFP
17462 && IS_VFP_REGNUM (regno))
17464 if (mode == SFmode || mode == SImode)
17465 return VFP_REGNO_OK_FOR_SINGLE (regno);
17467 if (mode == DFmode)
17468 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17470 /* VFP registers can hold HFmode values, but there is no point in
17471 putting them there unless we have hardware conversion insns. */
17472 if (mode == HFmode)
17473 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17476 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17477 || (VALID_NEON_QREG_MODE (mode)
17478 && NEON_REGNO_OK_FOR_QUAD (regno))
17479 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17480 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17481 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17482 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17483 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17488 if (TARGET_REALLY_IWMMXT)
17490 if (IS_IWMMXT_GR_REGNUM (regno))
17491 return mode == SImode;
17493 if (IS_IWMMXT_REGNUM (regno))
17494 return VALID_IWMMXT_REG_MODE (mode);
17497 /* We allow almost any value to be stored in the general registers.
17498 Restrict doubleword quantities to even register pairs so that we can
17499 use ldrd. Do not allow very large Neon structure opaque modes in
17500 general registers; they would use too many. */
17501 if (regno <= LAST_ARM_REGNUM)
17502 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17503 && ARM_NUM_REGS (mode) <= 4;
17505 if (regno == FRAME_POINTER_REGNUM
17506 || regno == ARG_POINTER_REGNUM)
17507 /* We only allow integers in the fake hard registers. */
17508 return GET_MODE_CLASS (mode) == MODE_INT;
17510 /* The only registers left are the FPA registers
17511 which we only allow to hold FP values. */
17512 return (TARGET_HARD_FLOAT && TARGET_FPA
17513 && GET_MODE_CLASS (mode) == MODE_FLOAT
17514 && regno >= FIRST_FPA_REGNUM
17515 && regno <= LAST_FPA_REGNUM);
17518 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17519 not used in arm mode. */
17522 arm_regno_class (int regno)
17526 if (regno == STACK_POINTER_REGNUM)
17528 if (regno == CC_REGNUM)
17535 if (TARGET_THUMB2 && regno < 8)
17538 if ( regno <= LAST_ARM_REGNUM
17539 || regno == FRAME_POINTER_REGNUM
17540 || regno == ARG_POINTER_REGNUM)
17541 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17543 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17544 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17546 if (IS_CIRRUS_REGNUM (regno))
17547 return CIRRUS_REGS;
17549 if (IS_VFP_REGNUM (regno))
17551 if (regno <= D7_VFP_REGNUM)
17552 return VFP_D0_D7_REGS;
17553 else if (regno <= LAST_LO_VFP_REGNUM)
17554 return VFP_LO_REGS;
17556 return VFP_HI_REGS;
17559 if (IS_IWMMXT_REGNUM (regno))
17560 return IWMMXT_REGS;
17562 if (IS_IWMMXT_GR_REGNUM (regno))
17563 return IWMMXT_GR_REGS;
17568 /* Handle a special case when computing the offset
17569 of an argument from the frame pointer. */
17571 arm_debugger_arg_offset (int value, rtx addr)
17575 /* We are only interested if dbxout_parms() failed to compute the offset. */
17579 /* We can only cope with the case where the address is held in a register. */
17580 if (GET_CODE (addr) != REG)
17583 /* If we are using the frame pointer to point at the argument, then
17584 an offset of 0 is correct. */
17585 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17588 /* If we are using the stack pointer to point at the
17589 argument, then an offset of 0 is correct. */
17590 /* ??? Check this is consistent with thumb2 frame layout. */
17591 if ((TARGET_THUMB || !frame_pointer_needed)
17592 && REGNO (addr) == SP_REGNUM)
17595 /* Oh dear. The argument is pointed to by a register rather
17596 than being held in a register, or being stored at a known
17597 offset from the frame pointer. Since GDB only understands
17598 those two kinds of argument we must translate the address
17599 held in the register into an offset from the frame pointer.
17600 We do this by searching through the insns for the function
17601 looking to see where this register gets its value. If the
17602 register is initialized from the frame pointer plus an offset
17603 then we are in luck and we can continue, otherwise we give up.
17605 This code is exercised by producing debugging information
17606 for a function with arguments like this:
17608 double func (double a, double b, int c, double d) {return d;}
17610 Without this code the stab for parameter 'd' will be set to
17611 an offset of 0 from the frame pointer, rather than 8. */
17613 /* The if() statement says:
17615 If the insn is a normal instruction
17616 and if the insn is setting the value in a register
17617 and if the register being set is the register holding the address of the argument
17618 and if the address is computing by an addition
17619 that involves adding to a register
17620 which is the frame pointer
17625 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17627 if ( GET_CODE (insn) == INSN
17628 && GET_CODE (PATTERN (insn)) == SET
17629 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17630 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17631 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17632 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17633 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17636 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17645 warning (0, "unable to compute real location of stacked parameter");
17646 value = 8; /* XXX magic hack */
17652 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17655 if ((MASK) & insn_flags) \
17656 add_builtin_function ((NAME), (TYPE), (CODE), \
17657 BUILT_IN_MD, NULL, NULL_TREE); \
17661 struct builtin_description
17663 const unsigned int mask;
17664 const enum insn_code icode;
17665 const char * const name;
17666 const enum arm_builtins code;
17667 const enum rtx_code comparison;
17668 const unsigned int flag;
17671 static const struct builtin_description bdesc_2arg[] =
17673 #define IWMMXT_BUILTIN(code, string, builtin) \
17674 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17675 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17677 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17678 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17679 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17680 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17681 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17682 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17683 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17684 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17685 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17686 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17687 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17688 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17689 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17690 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17691 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17692 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17693 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17694 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17695 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17696 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17697 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17698 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17699 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17700 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17701 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17702 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17703 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17704 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17705 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17706 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17707 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17708 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17709 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17710 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17711 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17712 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17713 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17714 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17715 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17716 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17717 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17718 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17719 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17720 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17721 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17722 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17723 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17724 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17725 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17726 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17727 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17728 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17729 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17730 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17731 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17732 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17733 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17734 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17736 #define IWMMXT_BUILTIN2(code, builtin) \
17737 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17739 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17740 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17741 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17742 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17743 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17744 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17745 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17746 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17747 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17748 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17749 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17750 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17751 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17752 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17753 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17754 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17755 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17756 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17757 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17758 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17759 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17760 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17761 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17762 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17763 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17764 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17765 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17766 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17767 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17768 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17769 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17770 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17773 static const struct builtin_description bdesc_1arg[] =
17775 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17776 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17777 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17778 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17779 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17780 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17781 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17782 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17783 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17784 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17785 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17786 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17787 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17788 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17789 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17790 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17791 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17792 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17795 /* Set up all the iWMMXt builtins. This is
17796 not called if TARGET_IWMMXT is zero. */
17799 arm_init_iwmmxt_builtins (void)
17801 const struct builtin_description * d;
17803 tree endlink = void_list_node;
17805 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17806 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17807 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17810 = build_function_type (integer_type_node,
17811 tree_cons (NULL_TREE, integer_type_node, endlink));
17812 tree v8qi_ftype_v8qi_v8qi_int
17813 = build_function_type (V8QI_type_node,
17814 tree_cons (NULL_TREE, V8QI_type_node,
17815 tree_cons (NULL_TREE, V8QI_type_node,
17816 tree_cons (NULL_TREE,
17819 tree v4hi_ftype_v4hi_int
17820 = build_function_type (V4HI_type_node,
17821 tree_cons (NULL_TREE, V4HI_type_node,
17822 tree_cons (NULL_TREE, integer_type_node,
17824 tree v2si_ftype_v2si_int
17825 = build_function_type (V2SI_type_node,
17826 tree_cons (NULL_TREE, V2SI_type_node,
17827 tree_cons (NULL_TREE, integer_type_node,
17829 tree v2si_ftype_di_di
17830 = build_function_type (V2SI_type_node,
17831 tree_cons (NULL_TREE, long_long_integer_type_node,
17832 tree_cons (NULL_TREE, long_long_integer_type_node,
17834 tree di_ftype_di_int
17835 = build_function_type (long_long_integer_type_node,
17836 tree_cons (NULL_TREE, long_long_integer_type_node,
17837 tree_cons (NULL_TREE, integer_type_node,
17839 tree di_ftype_di_int_int
17840 = build_function_type (long_long_integer_type_node,
17841 tree_cons (NULL_TREE, long_long_integer_type_node,
17842 tree_cons (NULL_TREE, integer_type_node,
17843 tree_cons (NULL_TREE,
17846 tree int_ftype_v8qi
17847 = build_function_type (integer_type_node,
17848 tree_cons (NULL_TREE, V8QI_type_node,
17850 tree int_ftype_v4hi
17851 = build_function_type (integer_type_node,
17852 tree_cons (NULL_TREE, V4HI_type_node,
17854 tree int_ftype_v2si
17855 = build_function_type (integer_type_node,
17856 tree_cons (NULL_TREE, V2SI_type_node,
17858 tree int_ftype_v8qi_int
17859 = build_function_type (integer_type_node,
17860 tree_cons (NULL_TREE, V8QI_type_node,
17861 tree_cons (NULL_TREE, integer_type_node,
17863 tree int_ftype_v4hi_int
17864 = build_function_type (integer_type_node,
17865 tree_cons (NULL_TREE, V4HI_type_node,
17866 tree_cons (NULL_TREE, integer_type_node,
17868 tree int_ftype_v2si_int
17869 = build_function_type (integer_type_node,
17870 tree_cons (NULL_TREE, V2SI_type_node,
17871 tree_cons (NULL_TREE, integer_type_node,
17873 tree v8qi_ftype_v8qi_int_int
17874 = build_function_type (V8QI_type_node,
17875 tree_cons (NULL_TREE, V8QI_type_node,
17876 tree_cons (NULL_TREE, integer_type_node,
17877 tree_cons (NULL_TREE,
17880 tree v4hi_ftype_v4hi_int_int
17881 = build_function_type (V4HI_type_node,
17882 tree_cons (NULL_TREE, V4HI_type_node,
17883 tree_cons (NULL_TREE, integer_type_node,
17884 tree_cons (NULL_TREE,
17887 tree v2si_ftype_v2si_int_int
17888 = build_function_type (V2SI_type_node,
17889 tree_cons (NULL_TREE, V2SI_type_node,
17890 tree_cons (NULL_TREE, integer_type_node,
17891 tree_cons (NULL_TREE,
17894 /* Miscellaneous. */
17895 tree v8qi_ftype_v4hi_v4hi
17896 = build_function_type (V8QI_type_node,
17897 tree_cons (NULL_TREE, V4HI_type_node,
17898 tree_cons (NULL_TREE, V4HI_type_node,
17900 tree v4hi_ftype_v2si_v2si
17901 = build_function_type (V4HI_type_node,
17902 tree_cons (NULL_TREE, V2SI_type_node,
17903 tree_cons (NULL_TREE, V2SI_type_node,
17905 tree v2si_ftype_v4hi_v4hi
17906 = build_function_type (V2SI_type_node,
17907 tree_cons (NULL_TREE, V4HI_type_node,
17908 tree_cons (NULL_TREE, V4HI_type_node,
17910 tree v2si_ftype_v8qi_v8qi
17911 = build_function_type (V2SI_type_node,
17912 tree_cons (NULL_TREE, V8QI_type_node,
17913 tree_cons (NULL_TREE, V8QI_type_node,
17915 tree v4hi_ftype_v4hi_di
17916 = build_function_type (V4HI_type_node,
17917 tree_cons (NULL_TREE, V4HI_type_node,
17918 tree_cons (NULL_TREE,
17919 long_long_integer_type_node,
17921 tree v2si_ftype_v2si_di
17922 = build_function_type (V2SI_type_node,
17923 tree_cons (NULL_TREE, V2SI_type_node,
17924 tree_cons (NULL_TREE,
17925 long_long_integer_type_node,
17927 tree void_ftype_int_int
17928 = build_function_type (void_type_node,
17929 tree_cons (NULL_TREE, integer_type_node,
17930 tree_cons (NULL_TREE, integer_type_node,
17933 = build_function_type (long_long_unsigned_type_node, endlink);
17935 = build_function_type (long_long_integer_type_node,
17936 tree_cons (NULL_TREE, V8QI_type_node,
17939 = build_function_type (long_long_integer_type_node,
17940 tree_cons (NULL_TREE, V4HI_type_node,
17943 = build_function_type (long_long_integer_type_node,
17944 tree_cons (NULL_TREE, V2SI_type_node,
17946 tree v2si_ftype_v4hi
17947 = build_function_type (V2SI_type_node,
17948 tree_cons (NULL_TREE, V4HI_type_node,
17950 tree v4hi_ftype_v8qi
17951 = build_function_type (V4HI_type_node,
17952 tree_cons (NULL_TREE, V8QI_type_node,
17955 tree di_ftype_di_v4hi_v4hi
17956 = build_function_type (long_long_unsigned_type_node,
17957 tree_cons (NULL_TREE,
17958 long_long_unsigned_type_node,
17959 tree_cons (NULL_TREE, V4HI_type_node,
17960 tree_cons (NULL_TREE,
17964 tree di_ftype_v4hi_v4hi
17965 = build_function_type (long_long_unsigned_type_node,
17966 tree_cons (NULL_TREE, V4HI_type_node,
17967 tree_cons (NULL_TREE, V4HI_type_node,
17970 /* Normal vector binops. */
17971 tree v8qi_ftype_v8qi_v8qi
17972 = build_function_type (V8QI_type_node,
17973 tree_cons (NULL_TREE, V8QI_type_node,
17974 tree_cons (NULL_TREE, V8QI_type_node,
17976 tree v4hi_ftype_v4hi_v4hi
17977 = build_function_type (V4HI_type_node,
17978 tree_cons (NULL_TREE, V4HI_type_node,
17979 tree_cons (NULL_TREE, V4HI_type_node,
17981 tree v2si_ftype_v2si_v2si
17982 = build_function_type (V2SI_type_node,
17983 tree_cons (NULL_TREE, V2SI_type_node,
17984 tree_cons (NULL_TREE, V2SI_type_node,
17986 tree di_ftype_di_di
17987 = build_function_type (long_long_unsigned_type_node,
17988 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17989 tree_cons (NULL_TREE,
17990 long_long_unsigned_type_node,
17993 /* Add all builtins that are more or less simple operations on two
17995 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17997 /* Use one of the operands; the target can have a different mode for
17998 mask-generating compares. */
17999 enum machine_mode mode;
18005 mode = insn_data[d->icode].operand[1].mode;
18010 type = v8qi_ftype_v8qi_v8qi;
18013 type = v4hi_ftype_v4hi_v4hi;
18016 type = v2si_ftype_v2si_v2si;
18019 type = di_ftype_di_di;
18023 gcc_unreachable ();
18026 def_mbuiltin (d->mask, d->name, type, d->code);
18029 /* Add the remaining MMX insns with somewhat more complicated types. */
18030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18053 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18060 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18065 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18067 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18069 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18074 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18077 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18079 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18083 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18084 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18088 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18090 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18091 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18092 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18094 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18095 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18096 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18097 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18098 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18099 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18100 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18101 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18102 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18103 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18104 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18105 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18107 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18108 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18109 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18110 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18112 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18113 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18114 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18115 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18116 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18117 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18118 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18122 arm_init_tls_builtins (void)
18126 ftype = build_function_type (ptr_type_node, void_list_node);
18127 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18128 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18130 TREE_NOTHROW (decl) = 1;
18131 TREE_READONLY (decl) = 1;
18134 enum neon_builtin_type_bits {
18150 #define v8qi_UP T_V8QI
18151 #define v4hi_UP T_V4HI
18152 #define v2si_UP T_V2SI
18153 #define v2sf_UP T_V2SF
18155 #define v16qi_UP T_V16QI
18156 #define v8hi_UP T_V8HI
18157 #define v4si_UP T_V4SI
18158 #define v4sf_UP T_V4SF
18159 #define v2di_UP T_V2DI
18164 #define UP(X) X##_UP
18199 NEON_LOADSTRUCTLANE,
18201 NEON_STORESTRUCTLANE,
18210 const neon_itype itype;
18212 const enum insn_code codes[T_MAX];
18213 const unsigned int num_vars;
18214 unsigned int base_fcode;
18215 } neon_builtin_datum;
18217 #define CF(N,X) CODE_FOR_neon_##N##X
18219 #define VAR1(T, N, A) \
18220 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18221 #define VAR2(T, N, A, B) \
18222 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18223 #define VAR3(T, N, A, B, C) \
18224 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18225 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18226 #define VAR4(T, N, A, B, C, D) \
18227 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18228 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18229 #define VAR5(T, N, A, B, C, D, E) \
18230 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18231 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18232 #define VAR6(T, N, A, B, C, D, E, F) \
18233 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18234 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18235 #define VAR7(T, N, A, B, C, D, E, F, G) \
18236 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18237 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18239 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18240 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18242 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18243 CF (N, G), CF (N, H) }, 8, 0
18244 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18245 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18246 | UP (H) | UP (I), \
18247 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18248 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18249 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18250 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18251 | UP (H) | UP (I) | UP (J), \
18252 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18253 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18255 /* The mode entries in the following table correspond to the "key" type of the
18256 instruction variant, i.e. equivalent to that which would be specified after
18257 the assembler mnemonic, which usually refers to the last vector operand.
18258 (Signed/unsigned/polynomial types are not differentiated between though, and
18259 are all mapped onto the same mode for a given element size.) The modes
18260 listed per instruction should be the same as those defined for that
18261 instruction's pattern in neon.md.
18262 WARNING: Variants should be listed in the same increasing order as
18263 neon_builtin_type_bits. */
18265 static neon_builtin_datum neon_builtin_data[] =
18267 { VAR10 (BINOP, vadd,
18268 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18269 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18270 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18271 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18272 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18273 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18274 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18275 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18276 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18277 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18278 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18279 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18280 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18281 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18282 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18283 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18284 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18285 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18286 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18287 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18288 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18289 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18290 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18291 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18292 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18293 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18294 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18295 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18296 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18297 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18298 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18299 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18300 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18301 { VAR10 (BINOP, vsub,
18302 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18303 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18304 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18305 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18306 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18307 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18308 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18309 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18310 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18311 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18312 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18313 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18314 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18315 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18316 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18317 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18318 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18319 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18320 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18321 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18322 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18323 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18324 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18325 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18326 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18327 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18328 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18329 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18330 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18331 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18332 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18333 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18334 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18335 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18336 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18337 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18338 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18339 /* FIXME: vget_lane supports more variants than this! */
18340 { VAR10 (GETLANE, vget_lane,
18341 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18342 { VAR10 (SETLANE, vset_lane,
18343 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18344 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18345 { VAR10 (DUP, vdup_n,
18346 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18347 { VAR10 (DUPLANE, vdup_lane,
18348 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18349 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18350 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18351 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18352 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18353 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18354 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18355 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18356 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18357 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18358 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18359 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18360 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18361 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18362 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18363 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18364 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18365 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18366 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18367 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18368 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18369 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18370 { VAR10 (BINOP, vext,
18371 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18372 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18373 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18374 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18375 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18376 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18377 { VAR10 (SELECT, vbsl,
18378 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18379 { VAR1 (VTBL, vtbl1, v8qi) },
18380 { VAR1 (VTBL, vtbl2, v8qi) },
18381 { VAR1 (VTBL, vtbl3, v8qi) },
18382 { VAR1 (VTBL, vtbl4, v8qi) },
18383 { VAR1 (VTBX, vtbx1, v8qi) },
18384 { VAR1 (VTBX, vtbx2, v8qi) },
18385 { VAR1 (VTBX, vtbx3, v8qi) },
18386 { VAR1 (VTBX, vtbx4, v8qi) },
18387 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18388 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18389 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18390 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18391 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18392 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18393 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18394 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18395 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18396 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18397 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18398 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18399 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18400 { VAR10 (LOAD1, vld1,
18401 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18402 { VAR10 (LOAD1LANE, vld1_lane,
18403 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18404 { VAR10 (LOAD1, vld1_dup,
18405 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18406 { VAR10 (STORE1, vst1,
18407 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18408 { VAR10 (STORE1LANE, vst1_lane,
18409 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18410 { VAR9 (LOADSTRUCT,
18411 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18412 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18413 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18414 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18415 { VAR9 (STORESTRUCT, vst2,
18416 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18417 { VAR7 (STORESTRUCTLANE, vst2_lane,
18418 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18419 { VAR9 (LOADSTRUCT,
18420 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18421 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18422 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18423 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18424 { VAR9 (STORESTRUCT, vst3,
18425 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18426 { VAR7 (STORESTRUCTLANE, vst3_lane,
18427 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18428 { VAR9 (LOADSTRUCT, vld4,
18429 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18430 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18431 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18432 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18433 { VAR9 (STORESTRUCT, vst4,
18434 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18435 { VAR7 (STORESTRUCTLANE, vst4_lane,
18436 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18437 { VAR10 (LOGICBINOP, vand,
18438 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18439 { VAR10 (LOGICBINOP, vorr,
18440 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18441 { VAR10 (BINOP, veor,
18442 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18443 { VAR10 (LOGICBINOP, vbic,
18444 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18445 { VAR10 (LOGICBINOP, vorn,
18446 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18462 arm_init_neon_builtins (void)
18464 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18466 tree neon_intQI_type_node;
18467 tree neon_intHI_type_node;
18468 tree neon_polyQI_type_node;
18469 tree neon_polyHI_type_node;
18470 tree neon_intSI_type_node;
18471 tree neon_intDI_type_node;
18472 tree neon_float_type_node;
18474 tree intQI_pointer_node;
18475 tree intHI_pointer_node;
18476 tree intSI_pointer_node;
18477 tree intDI_pointer_node;
18478 tree float_pointer_node;
18480 tree const_intQI_node;
18481 tree const_intHI_node;
18482 tree const_intSI_node;
18483 tree const_intDI_node;
18484 tree const_float_node;
18486 tree const_intQI_pointer_node;
18487 tree const_intHI_pointer_node;
18488 tree const_intSI_pointer_node;
18489 tree const_intDI_pointer_node;
18490 tree const_float_pointer_node;
18492 tree V8QI_type_node;
18493 tree V4HI_type_node;
18494 tree V2SI_type_node;
18495 tree V2SF_type_node;
18496 tree V16QI_type_node;
18497 tree V8HI_type_node;
18498 tree V4SI_type_node;
18499 tree V4SF_type_node;
18500 tree V2DI_type_node;
18502 tree intUQI_type_node;
18503 tree intUHI_type_node;
18504 tree intUSI_type_node;
18505 tree intUDI_type_node;
18507 tree intEI_type_node;
18508 tree intOI_type_node;
18509 tree intCI_type_node;
18510 tree intXI_type_node;
18512 tree V8QI_pointer_node;
18513 tree V4HI_pointer_node;
18514 tree V2SI_pointer_node;
18515 tree V2SF_pointer_node;
18516 tree V16QI_pointer_node;
18517 tree V8HI_pointer_node;
18518 tree V4SI_pointer_node;
18519 tree V4SF_pointer_node;
18520 tree V2DI_pointer_node;
18522 tree void_ftype_pv8qi_v8qi_v8qi;
18523 tree void_ftype_pv4hi_v4hi_v4hi;
18524 tree void_ftype_pv2si_v2si_v2si;
18525 tree void_ftype_pv2sf_v2sf_v2sf;
18526 tree void_ftype_pdi_di_di;
18527 tree void_ftype_pv16qi_v16qi_v16qi;
18528 tree void_ftype_pv8hi_v8hi_v8hi;
18529 tree void_ftype_pv4si_v4si_v4si;
18530 tree void_ftype_pv4sf_v4sf_v4sf;
18531 tree void_ftype_pv2di_v2di_v2di;
18533 tree reinterp_ftype_dreg[5][5];
18534 tree reinterp_ftype_qreg[5][5];
18535 tree dreg_types[5], qreg_types[5];
18537 /* Create distinguished type nodes for NEON vector element types,
18538 and pointers to values of such types, so we can detect them later. */
18539 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18540 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18541 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18542 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18543 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18544 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18545 neon_float_type_node = make_node (REAL_TYPE);
18546 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18547 layout_type (neon_float_type_node);
18549 /* Define typedefs which exactly correspond to the modes we are basing vector
18550 types on. If you change these names you'll need to change
18551 the table used by arm_mangle_type too. */
18552 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18553 "__builtin_neon_qi");
18554 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18555 "__builtin_neon_hi");
18556 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18557 "__builtin_neon_si");
18558 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18559 "__builtin_neon_sf");
18560 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18561 "__builtin_neon_di");
18562 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18563 "__builtin_neon_poly8");
18564 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18565 "__builtin_neon_poly16");
18567 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18568 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18569 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18570 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18571 float_pointer_node = build_pointer_type (neon_float_type_node);
18573 /* Next create constant-qualified versions of the above types. */
18574 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18576 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18578 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18580 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18582 const_float_node = build_qualified_type (neon_float_type_node,
18585 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18586 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18587 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18588 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18589 const_float_pointer_node = build_pointer_type (const_float_node);
18591 /* Now create vector types based on our NEON element types. */
18592 /* 64-bit vectors. */
18594 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18596 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18598 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18600 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18601 /* 128-bit vectors. */
18603 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18605 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18607 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18609 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18611 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18613 /* Unsigned integer types for various mode sizes. */
18614 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18615 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18616 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18617 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18619 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18620 "__builtin_neon_uqi");
18621 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18622 "__builtin_neon_uhi");
18623 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18624 "__builtin_neon_usi");
18625 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18626 "__builtin_neon_udi");
18628 /* Opaque integer types for structures of vectors. */
18629 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18630 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18631 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18632 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18634 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18635 "__builtin_neon_ti");
18636 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18637 "__builtin_neon_ei");
18638 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18639 "__builtin_neon_oi");
18640 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18641 "__builtin_neon_ci");
18642 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18643 "__builtin_neon_xi");
18645 /* Pointers to vector types. */
18646 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18647 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18648 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18649 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18650 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18651 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18652 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18653 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18654 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18656 /* Operations which return results as pairs. */
18657 void_ftype_pv8qi_v8qi_v8qi =
18658 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18659 V8QI_type_node, NULL);
18660 void_ftype_pv4hi_v4hi_v4hi =
18661 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18662 V4HI_type_node, NULL);
18663 void_ftype_pv2si_v2si_v2si =
18664 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18665 V2SI_type_node, NULL);
18666 void_ftype_pv2sf_v2sf_v2sf =
18667 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18668 V2SF_type_node, NULL);
18669 void_ftype_pdi_di_di =
18670 build_function_type_list (void_type_node, intDI_pointer_node,
18671 neon_intDI_type_node, neon_intDI_type_node, NULL);
18672 void_ftype_pv16qi_v16qi_v16qi =
18673 build_function_type_list (void_type_node, V16QI_pointer_node,
18674 V16QI_type_node, V16QI_type_node, NULL);
18675 void_ftype_pv8hi_v8hi_v8hi =
18676 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18677 V8HI_type_node, NULL);
18678 void_ftype_pv4si_v4si_v4si =
18679 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18680 V4SI_type_node, NULL);
18681 void_ftype_pv4sf_v4sf_v4sf =
18682 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18683 V4SF_type_node, NULL);
18684 void_ftype_pv2di_v2di_v2di =
18685 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18686 V2DI_type_node, NULL);
18688 dreg_types[0] = V8QI_type_node;
18689 dreg_types[1] = V4HI_type_node;
18690 dreg_types[2] = V2SI_type_node;
18691 dreg_types[3] = V2SF_type_node;
18692 dreg_types[4] = neon_intDI_type_node;
18694 qreg_types[0] = V16QI_type_node;
18695 qreg_types[1] = V8HI_type_node;
18696 qreg_types[2] = V4SI_type_node;
18697 qreg_types[3] = V4SF_type_node;
18698 qreg_types[4] = V2DI_type_node;
18700 for (i = 0; i < 5; i++)
18703 for (j = 0; j < 5; j++)
18705 reinterp_ftype_dreg[i][j]
18706 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18707 reinterp_ftype_qreg[i][j]
18708 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18712 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18714 neon_builtin_datum *d = &neon_builtin_data[i];
18715 unsigned int j, codeidx = 0;
18717 d->base_fcode = fcode;
18719 for (j = 0; j < T_MAX; j++)
18721 const char* const modenames[] = {
18722 "v8qi", "v4hi", "v2si", "v2sf", "di",
18723 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18727 enum insn_code icode;
18728 int is_load = 0, is_store = 0;
18730 if ((d->bits & (1 << j)) == 0)
18733 icode = d->codes[codeidx++];
18738 case NEON_LOAD1LANE:
18739 case NEON_LOADSTRUCT:
18740 case NEON_LOADSTRUCTLANE:
18742 /* Fall through. */
18744 case NEON_STORE1LANE:
18745 case NEON_STORESTRUCT:
18746 case NEON_STORESTRUCTLANE:
18749 /* Fall through. */
18752 case NEON_LOGICBINOP:
18753 case NEON_SHIFTINSERT:
18760 case NEON_SHIFTIMM:
18761 case NEON_SHIFTACC:
18767 case NEON_LANEMULL:
18768 case NEON_LANEMULH:
18770 case NEON_SCALARMUL:
18771 case NEON_SCALARMULL:
18772 case NEON_SCALARMULH:
18773 case NEON_SCALARMAC:
18779 tree return_type = void_type_node, args = void_list_node;
18781 /* Build a function type directly from the insn_data for this
18782 builtin. The build_function_type() function takes care of
18783 removing duplicates for us. */
18784 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18788 if (is_load && k == 1)
18790 /* Neon load patterns always have the memory operand
18791 (a SImode pointer) in the operand 1 position. We
18792 want a const pointer to the element type in that
18794 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18800 eltype = const_intQI_pointer_node;
18805 eltype = const_intHI_pointer_node;
18810 eltype = const_intSI_pointer_node;
18815 eltype = const_float_pointer_node;
18820 eltype = const_intDI_pointer_node;
18823 default: gcc_unreachable ();
18826 else if (is_store && k == 0)
18828 /* Similarly, Neon store patterns use operand 0 as
18829 the memory location to store to (a SImode pointer).
18830 Use a pointer to the element type of the store in
18832 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18838 eltype = intQI_pointer_node;
18843 eltype = intHI_pointer_node;
18848 eltype = intSI_pointer_node;
18853 eltype = float_pointer_node;
18858 eltype = intDI_pointer_node;
18861 default: gcc_unreachable ();
18866 switch (insn_data[icode].operand[k].mode)
18868 case VOIDmode: eltype = void_type_node; break;
18870 case QImode: eltype = neon_intQI_type_node; break;
18871 case HImode: eltype = neon_intHI_type_node; break;
18872 case SImode: eltype = neon_intSI_type_node; break;
18873 case SFmode: eltype = neon_float_type_node; break;
18874 case DImode: eltype = neon_intDI_type_node; break;
18875 case TImode: eltype = intTI_type_node; break;
18876 case EImode: eltype = intEI_type_node; break;
18877 case OImode: eltype = intOI_type_node; break;
18878 case CImode: eltype = intCI_type_node; break;
18879 case XImode: eltype = intXI_type_node; break;
18880 /* 64-bit vectors. */
18881 case V8QImode: eltype = V8QI_type_node; break;
18882 case V4HImode: eltype = V4HI_type_node; break;
18883 case V2SImode: eltype = V2SI_type_node; break;
18884 case V2SFmode: eltype = V2SF_type_node; break;
18885 /* 128-bit vectors. */
18886 case V16QImode: eltype = V16QI_type_node; break;
18887 case V8HImode: eltype = V8HI_type_node; break;
18888 case V4SImode: eltype = V4SI_type_node; break;
18889 case V4SFmode: eltype = V4SF_type_node; break;
18890 case V2DImode: eltype = V2DI_type_node; break;
18891 default: gcc_unreachable ();
18895 if (k == 0 && !is_store)
18896 return_type = eltype;
18898 args = tree_cons (NULL_TREE, eltype, args);
18901 ftype = build_function_type (return_type, args);
18905 case NEON_RESULTPAIR:
18907 switch (insn_data[icode].operand[1].mode)
18909 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18910 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18911 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18912 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18913 case DImode: ftype = void_ftype_pdi_di_di; break;
18914 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18915 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18916 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18917 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18918 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18919 default: gcc_unreachable ();
18924 case NEON_REINTERP:
18926 /* We iterate over 5 doubleword types, then 5 quadword
18929 switch (insn_data[icode].operand[0].mode)
18931 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18932 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18933 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18934 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18935 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18936 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18937 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18938 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18939 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18940 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18941 default: gcc_unreachable ();
18947 gcc_unreachable ();
18950 gcc_assert (ftype != NULL);
18952 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18954 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18961 arm_init_fp16_builtins (void)
18963 tree fp16_type = make_node (REAL_TYPE);
18964 TYPE_PRECISION (fp16_type) = 16;
18965 layout_type (fp16_type);
18966 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18970 arm_init_builtins (void)
18972 arm_init_tls_builtins ();
18974 if (TARGET_REALLY_IWMMXT)
18975 arm_init_iwmmxt_builtins ();
18978 arm_init_neon_builtins ();
18980 if (arm_fp16_format)
18981 arm_init_fp16_builtins ();
18984 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18986 static const char *
18987 arm_invalid_parameter_type (const_tree t)
18989 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18990 return N_("function parameters cannot have __fp16 type");
18994 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18996 static const char *
18997 arm_invalid_return_type (const_tree t)
18999 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19000 return N_("functions cannot return __fp16 type");
19004 /* Implement TARGET_PROMOTED_TYPE. */
19007 arm_promoted_type (const_tree t)
19009 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19010 return float_type_node;
19014 /* Implement TARGET_CONVERT_TO_TYPE.
19015 Specifically, this hook implements the peculiarity of the ARM
19016 half-precision floating-point C semantics that requires conversions between
19017 __fp16 to or from double to do an intermediate conversion to float. */
19020 arm_convert_to_type (tree type, tree expr)
19022 tree fromtype = TREE_TYPE (expr);
19023 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19025 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19026 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19027 return convert (type, convert (float_type_node, expr));
19031 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19032 This simply adds HFmode as a supported mode; even though we don't
19033 implement arithmetic on this type directly, it's supported by
19034 optabs conversions, much the way the double-word arithmetic is
19035 special-cased in the default hook. */
19038 arm_scalar_mode_supported_p (enum machine_mode mode)
19040 if (mode == HFmode)
19041 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19043 return default_scalar_mode_supported_p (mode);
19046 /* Errors in the source file can cause expand_expr to return const0_rtx
19047 where we expect a vector. To avoid crashing, use one of the vector
19048 clear instructions. */
19051 safe_vector_operand (rtx x, enum machine_mode mode)
19053 if (x != const0_rtx)
19055 x = gen_reg_rtx (mode);
19057 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19058 : gen_rtx_SUBREG (DImode, x, 0)));
19062 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19065 arm_expand_binop_builtin (enum insn_code icode,
19066 tree exp, rtx target)
19069 tree arg0 = CALL_EXPR_ARG (exp, 0);
19070 tree arg1 = CALL_EXPR_ARG (exp, 1);
19071 rtx op0 = expand_normal (arg0);
19072 rtx op1 = expand_normal (arg1);
19073 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19074 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19075 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19077 if (VECTOR_MODE_P (mode0))
19078 op0 = safe_vector_operand (op0, mode0);
19079 if (VECTOR_MODE_P (mode1))
19080 op1 = safe_vector_operand (op1, mode1);
19083 || GET_MODE (target) != tmode
19084 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19085 target = gen_reg_rtx (tmode);
19087 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19090 op0 = copy_to_mode_reg (mode0, op0);
19091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19092 op1 = copy_to_mode_reg (mode1, op1);
19094 pat = GEN_FCN (icode) (target, op0, op1);
19101 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19104 arm_expand_unop_builtin (enum insn_code icode,
19105 tree exp, rtx target, int do_load)
19108 tree arg0 = CALL_EXPR_ARG (exp, 0);
19109 rtx op0 = expand_normal (arg0);
19110 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19111 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19114 || GET_MODE (target) != tmode
19115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19116 target = gen_reg_rtx (tmode);
19118 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19121 if (VECTOR_MODE_P (mode0))
19122 op0 = safe_vector_operand (op0, mode0);
19124 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19125 op0 = copy_to_mode_reg (mode0, op0);
19128 pat = GEN_FCN (icode) (target, op0);
19136 neon_builtin_compare (const void *a, const void *b)
19138 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19139 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19140 unsigned int soughtcode = key->base_fcode;
19142 if (soughtcode >= memb->base_fcode
19143 && soughtcode < memb->base_fcode + memb->num_vars)
19145 else if (soughtcode < memb->base_fcode)
19151 static enum insn_code
19152 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19154 neon_builtin_datum key
19155 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19156 neon_builtin_datum *found;
19159 key.base_fcode = fcode;
19160 found = (neon_builtin_datum *)
19161 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19162 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19163 gcc_assert (found);
19164 idx = fcode - (int) found->base_fcode;
19165 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19168 *itype = found->itype;
19170 return found->codes[idx];
19174 NEON_ARG_COPY_TO_REG,
19179 #define NEON_MAX_BUILTIN_ARGS 5
19181 /* Expand a Neon builtin. */
19183 arm_expand_neon_args (rtx target, int icode, int have_retval,
19188 tree arg[NEON_MAX_BUILTIN_ARGS];
19189 rtx op[NEON_MAX_BUILTIN_ARGS];
19190 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19191 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19196 || GET_MODE (target) != tmode
19197 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19198 target = gen_reg_rtx (tmode);
19200 va_start (ap, exp);
19204 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19206 if (thisarg == NEON_ARG_STOP)
19210 arg[argc] = CALL_EXPR_ARG (exp, argc);
19211 op[argc] = expand_normal (arg[argc]);
19212 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19216 case NEON_ARG_COPY_TO_REG:
19217 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19218 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19219 (op[argc], mode[argc]))
19220 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19223 case NEON_ARG_CONSTANT:
19224 /* FIXME: This error message is somewhat unhelpful. */
19225 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19226 (op[argc], mode[argc]))
19227 error ("argument must be a constant");
19230 case NEON_ARG_STOP:
19231 gcc_unreachable ();
19244 pat = GEN_FCN (icode) (target, op[0]);
19248 pat = GEN_FCN (icode) (target, op[0], op[1]);
19252 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19256 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19260 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19264 gcc_unreachable ();
19270 pat = GEN_FCN (icode) (op[0]);
19274 pat = GEN_FCN (icode) (op[0], op[1]);
19278 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19282 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19286 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19290 gcc_unreachable ();
19301 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19302 constants defined per-instruction or per instruction-variant. Instead, the
19303 required info is looked up in the table neon_builtin_data. */
19305 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19308 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19315 return arm_expand_neon_args (target, icode, 1, exp,
19316 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19320 case NEON_SCALARMUL:
19321 case NEON_SCALARMULL:
19322 case NEON_SCALARMULH:
19323 case NEON_SHIFTINSERT:
19324 case NEON_LOGICBINOP:
19325 return arm_expand_neon_args (target, icode, 1, exp,
19326 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19330 return arm_expand_neon_args (target, icode, 1, exp,
19331 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19332 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19336 case NEON_SHIFTIMM:
19337 return arm_expand_neon_args (target, icode, 1, exp,
19338 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19342 return arm_expand_neon_args (target, icode, 1, exp,
19343 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19347 case NEON_REINTERP:
19348 return arm_expand_neon_args (target, icode, 1, exp,
19349 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19353 return arm_expand_neon_args (target, icode, 1, exp,
19354 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19356 case NEON_RESULTPAIR:
19357 return arm_expand_neon_args (target, icode, 0, exp,
19358 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19362 case NEON_LANEMULL:
19363 case NEON_LANEMULH:
19364 return arm_expand_neon_args (target, icode, 1, exp,
19365 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19366 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19369 return arm_expand_neon_args (target, icode, 1, exp,
19370 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19371 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19373 case NEON_SHIFTACC:
19374 return arm_expand_neon_args (target, icode, 1, exp,
19375 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19376 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19378 case NEON_SCALARMAC:
19379 return arm_expand_neon_args (target, icode, 1, exp,
19380 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19381 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19385 return arm_expand_neon_args (target, icode, 1, exp,
19386 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19390 case NEON_LOADSTRUCT:
19391 return arm_expand_neon_args (target, icode, 1, exp,
19392 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19394 case NEON_LOAD1LANE:
19395 case NEON_LOADSTRUCTLANE:
19396 return arm_expand_neon_args (target, icode, 1, exp,
19397 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19401 case NEON_STORESTRUCT:
19402 return arm_expand_neon_args (target, icode, 0, exp,
19403 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19405 case NEON_STORE1LANE:
19406 case NEON_STORESTRUCTLANE:
19407 return arm_expand_neon_args (target, icode, 0, exp,
19408 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19412 gcc_unreachable ();
19415 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19417 neon_reinterpret (rtx dest, rtx src)
19419 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19422 /* Emit code to place a Neon pair result in memory locations (with equal
19425 neon_emit_pair_result_insn (enum machine_mode mode,
19426 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19429 rtx mem = gen_rtx_MEM (mode, destaddr);
19430 rtx tmp1 = gen_reg_rtx (mode);
19431 rtx tmp2 = gen_reg_rtx (mode);
19433 emit_insn (intfn (tmp1, op1, tmp2, op2));
19435 emit_move_insn (mem, tmp1);
19436 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19437 emit_move_insn (mem, tmp2);
19440 /* Set up operands for a register copy from src to dest, taking care not to
19441 clobber registers in the process.
19442 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19443 be called with a large N, so that should be OK. */
19446 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19448 unsigned int copied = 0, opctr = 0;
19449 unsigned int done = (1 << count) - 1;
19452 while (copied != done)
19454 for (i = 0; i < count; i++)
19458 for (j = 0; good && j < count; j++)
19459 if (i != j && (copied & (1 << j)) == 0
19460 && reg_overlap_mentioned_p (src[j], dest[i]))
19465 operands[opctr++] = dest[i];
19466 operands[opctr++] = src[i];
19472 gcc_assert (opctr == count * 2);
19475 /* Expand an expression EXP that calls a built-in function,
19476 with result going to TARGET if that's convenient
19477 (and in mode MODE if that's convenient).
19478 SUBTARGET may be used as the target for computing one of EXP's operands.
19479 IGNORE is nonzero if the value is to be ignored. */
19482 arm_expand_builtin (tree exp,
19484 rtx subtarget ATTRIBUTE_UNUSED,
19485 enum machine_mode mode ATTRIBUTE_UNUSED,
19486 int ignore ATTRIBUTE_UNUSED)
19488 const struct builtin_description * d;
19489 enum insn_code icode;
19490 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19498 int fcode = DECL_FUNCTION_CODE (fndecl);
19500 enum machine_mode tmode;
19501 enum machine_mode mode0;
19502 enum machine_mode mode1;
19503 enum machine_mode mode2;
19505 if (fcode >= ARM_BUILTIN_NEON_BASE)
19506 return arm_expand_neon_builtin (fcode, exp, target);
19510 case ARM_BUILTIN_TEXTRMSB:
19511 case ARM_BUILTIN_TEXTRMUB:
19512 case ARM_BUILTIN_TEXTRMSH:
19513 case ARM_BUILTIN_TEXTRMUH:
19514 case ARM_BUILTIN_TEXTRMSW:
19515 case ARM_BUILTIN_TEXTRMUW:
19516 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19517 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19518 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19519 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19520 : CODE_FOR_iwmmxt_textrmw);
19522 arg0 = CALL_EXPR_ARG (exp, 0);
19523 arg1 = CALL_EXPR_ARG (exp, 1);
19524 op0 = expand_normal (arg0);
19525 op1 = expand_normal (arg1);
19526 tmode = insn_data[icode].operand[0].mode;
19527 mode0 = insn_data[icode].operand[1].mode;
19528 mode1 = insn_data[icode].operand[2].mode;
19530 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19531 op0 = copy_to_mode_reg (mode0, op0);
19532 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19534 /* @@@ better error message */
19535 error ("selector must be an immediate");
19536 return gen_reg_rtx (tmode);
19539 || GET_MODE (target) != tmode
19540 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19541 target = gen_reg_rtx (tmode);
19542 pat = GEN_FCN (icode) (target, op0, op1);
19548 case ARM_BUILTIN_TINSRB:
19549 case ARM_BUILTIN_TINSRH:
19550 case ARM_BUILTIN_TINSRW:
19551 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19552 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19553 : CODE_FOR_iwmmxt_tinsrw);
19554 arg0 = CALL_EXPR_ARG (exp, 0);
19555 arg1 = CALL_EXPR_ARG (exp, 1);
19556 arg2 = CALL_EXPR_ARG (exp, 2);
19557 op0 = expand_normal (arg0);
19558 op1 = expand_normal (arg1);
19559 op2 = expand_normal (arg2);
19560 tmode = insn_data[icode].operand[0].mode;
19561 mode0 = insn_data[icode].operand[1].mode;
19562 mode1 = insn_data[icode].operand[2].mode;
19563 mode2 = insn_data[icode].operand[3].mode;
19565 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19566 op0 = copy_to_mode_reg (mode0, op0);
19567 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19568 op1 = copy_to_mode_reg (mode1, op1);
19569 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19571 /* @@@ better error message */
19572 error ("selector must be an immediate");
19576 || GET_MODE (target) != tmode
19577 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19578 target = gen_reg_rtx (tmode);
19579 pat = GEN_FCN (icode) (target, op0, op1, op2);
19585 case ARM_BUILTIN_SETWCX:
19586 arg0 = CALL_EXPR_ARG (exp, 0);
19587 arg1 = CALL_EXPR_ARG (exp, 1);
19588 op0 = force_reg (SImode, expand_normal (arg0));
19589 op1 = expand_normal (arg1);
19590 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19593 case ARM_BUILTIN_GETWCX:
19594 arg0 = CALL_EXPR_ARG (exp, 0);
19595 op0 = expand_normal (arg0);
19596 target = gen_reg_rtx (SImode);
19597 emit_insn (gen_iwmmxt_tmrc (target, op0));
19600 case ARM_BUILTIN_WSHUFH:
19601 icode = CODE_FOR_iwmmxt_wshufh;
19602 arg0 = CALL_EXPR_ARG (exp, 0);
19603 arg1 = CALL_EXPR_ARG (exp, 1);
19604 op0 = expand_normal (arg0);
19605 op1 = expand_normal (arg1);
19606 tmode = insn_data[icode].operand[0].mode;
19607 mode1 = insn_data[icode].operand[1].mode;
19608 mode2 = insn_data[icode].operand[2].mode;
19610 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19611 op0 = copy_to_mode_reg (mode1, op0);
19612 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19614 /* @@@ better error message */
19615 error ("mask must be an immediate");
19619 || GET_MODE (target) != tmode
19620 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19621 target = gen_reg_rtx (tmode);
19622 pat = GEN_FCN (icode) (target, op0, op1);
19628 case ARM_BUILTIN_WSADB:
19629 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19630 case ARM_BUILTIN_WSADH:
19631 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19632 case ARM_BUILTIN_WSADBZ:
19633 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19634 case ARM_BUILTIN_WSADHZ:
19635 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19637 /* Several three-argument builtins. */
19638 case ARM_BUILTIN_WMACS:
19639 case ARM_BUILTIN_WMACU:
19640 case ARM_BUILTIN_WALIGN:
19641 case ARM_BUILTIN_TMIA:
19642 case ARM_BUILTIN_TMIAPH:
19643 case ARM_BUILTIN_TMIATT:
19644 case ARM_BUILTIN_TMIATB:
19645 case ARM_BUILTIN_TMIABT:
19646 case ARM_BUILTIN_TMIABB:
19647 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19648 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19649 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19650 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19651 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19652 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19653 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19654 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19655 : CODE_FOR_iwmmxt_walign);
19656 arg0 = CALL_EXPR_ARG (exp, 0);
19657 arg1 = CALL_EXPR_ARG (exp, 1);
19658 arg2 = CALL_EXPR_ARG (exp, 2);
19659 op0 = expand_normal (arg0);
19660 op1 = expand_normal (arg1);
19661 op2 = expand_normal (arg2);
19662 tmode = insn_data[icode].operand[0].mode;
19663 mode0 = insn_data[icode].operand[1].mode;
19664 mode1 = insn_data[icode].operand[2].mode;
19665 mode2 = insn_data[icode].operand[3].mode;
19667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19668 op0 = copy_to_mode_reg (mode0, op0);
19669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19670 op1 = copy_to_mode_reg (mode1, op1);
19671 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19672 op2 = copy_to_mode_reg (mode2, op2);
19674 || GET_MODE (target) != tmode
19675 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19676 target = gen_reg_rtx (tmode);
19677 pat = GEN_FCN (icode) (target, op0, op1, op2);
19683 case ARM_BUILTIN_WZERO:
19684 target = gen_reg_rtx (DImode);
19685 emit_insn (gen_iwmmxt_clrdi (target));
19688 case ARM_BUILTIN_THREAD_POINTER:
19689 return arm_load_tp (target);
19695 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19696 if (d->code == (const enum arm_builtins) fcode)
19697 return arm_expand_binop_builtin (d->icode, exp, target);
19699 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19700 if (d->code == (const enum arm_builtins) fcode)
19701 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19703 /* @@@ Should really do something sensible here. */
19707 /* Return the number (counting from 0) of
19708 the least significant set bit in MASK. */
19711 number_of_first_bit_set (unsigned mask)
19716 (mask & (1 << bit)) == 0;
19723 /* Emit code to push or pop registers to or from the stack. F is the
19724 assembly file. MASK is the registers to push or pop. PUSH is
19725 nonzero if we should push, and zero if we should pop. For debugging
19726 output, if pushing, adjust CFA_OFFSET by the amount of space added
19727 to the stack. REAL_REGS should have the same number of bits set as
19728 MASK, and will be used instead (in the same order) to describe which
19729 registers were saved - this is used to mark the save slots when we
19730 push high registers after moving them to low registers. */
19732 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19733 unsigned long real_regs)
19736 int lo_mask = mask & 0xFF;
19737 int pushed_words = 0;
19741 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19743 /* Special case. Do not generate a POP PC statement here, do it in
19745 thumb_exit (f, -1);
19749 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19751 fprintf (f, "\t.save\t{");
19752 for (regno = 0; regno < 15; regno++)
19754 if (real_regs & (1 << regno))
19756 if (real_regs & ((1 << regno) -1))
19758 asm_fprintf (f, "%r", regno);
19761 fprintf (f, "}\n");
19764 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19766 /* Look at the low registers first. */
19767 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19771 asm_fprintf (f, "%r", regno);
19773 if ((lo_mask & ~1) != 0)
19780 if (push && (mask & (1 << LR_REGNUM)))
19782 /* Catch pushing the LR. */
19786 asm_fprintf (f, "%r", LR_REGNUM);
19790 else if (!push && (mask & (1 << PC_REGNUM)))
19792 /* Catch popping the PC. */
19793 if (TARGET_INTERWORK || TARGET_BACKTRACE
19794 || crtl->calls_eh_return)
19796 /* The PC is never poped directly, instead
19797 it is popped into r3 and then BX is used. */
19798 fprintf (f, "}\n");
19800 thumb_exit (f, -1);
19809 asm_fprintf (f, "%r", PC_REGNUM);
19813 fprintf (f, "}\n");
19815 if (push && pushed_words && dwarf2out_do_frame ())
19817 char *l = dwarf2out_cfi_label (false);
19818 int pushed_mask = real_regs;
19820 *cfa_offset += pushed_words * 4;
19821 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19824 pushed_mask = real_regs;
19825 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19827 if (pushed_mask & 1)
19828 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19833 /* Generate code to return from a thumb function.
19834 If 'reg_containing_return_addr' is -1, then the return address is
19835 actually on the stack, at the stack pointer. */
19837 thumb_exit (FILE *f, int reg_containing_return_addr)
19839 unsigned regs_available_for_popping;
19840 unsigned regs_to_pop;
19842 unsigned available;
19846 int restore_a4 = FALSE;
19848 /* Compute the registers we need to pop. */
19852 if (reg_containing_return_addr == -1)
19854 regs_to_pop |= 1 << LR_REGNUM;
19858 if (TARGET_BACKTRACE)
19860 /* Restore the (ARM) frame pointer and stack pointer. */
19861 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19865 /* If there is nothing to pop then just emit the BX instruction and
19867 if (pops_needed == 0)
19869 if (crtl->calls_eh_return)
19870 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19872 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19875 /* Otherwise if we are not supporting interworking and we have not created
19876 a backtrace structure and the function was not entered in ARM mode then
19877 just pop the return address straight into the PC. */
19878 else if (!TARGET_INTERWORK
19879 && !TARGET_BACKTRACE
19880 && !is_called_in_ARM_mode (current_function_decl)
19881 && !crtl->calls_eh_return)
19883 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19887 /* Find out how many of the (return) argument registers we can corrupt. */
19888 regs_available_for_popping = 0;
19890 /* If returning via __builtin_eh_return, the bottom three registers
19891 all contain information needed for the return. */
19892 if (crtl->calls_eh_return)
19896 /* If we can deduce the registers used from the function's
19897 return value. This is more reliable that examining
19898 df_regs_ever_live_p () because that will be set if the register is
19899 ever used in the function, not just if the register is used
19900 to hold a return value. */
19902 if (crtl->return_rtx != 0)
19903 mode = GET_MODE (crtl->return_rtx);
19905 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19907 size = GET_MODE_SIZE (mode);
19911 /* In a void function we can use any argument register.
19912 In a function that returns a structure on the stack
19913 we can use the second and third argument registers. */
19914 if (mode == VOIDmode)
19915 regs_available_for_popping =
19916 (1 << ARG_REGISTER (1))
19917 | (1 << ARG_REGISTER (2))
19918 | (1 << ARG_REGISTER (3));
19920 regs_available_for_popping =
19921 (1 << ARG_REGISTER (2))
19922 | (1 << ARG_REGISTER (3));
19924 else if (size <= 4)
19925 regs_available_for_popping =
19926 (1 << ARG_REGISTER (2))
19927 | (1 << ARG_REGISTER (3));
19928 else if (size <= 8)
19929 regs_available_for_popping =
19930 (1 << ARG_REGISTER (3));
19933 /* Match registers to be popped with registers into which we pop them. */
19934 for (available = regs_available_for_popping,
19935 required = regs_to_pop;
19936 required != 0 && available != 0;
19937 available &= ~(available & - available),
19938 required &= ~(required & - required))
19941 /* If we have any popping registers left over, remove them. */
19943 regs_available_for_popping &= ~available;
19945 /* Otherwise if we need another popping register we can use
19946 the fourth argument register. */
19947 else if (pops_needed)
19949 /* If we have not found any free argument registers and
19950 reg a4 contains the return address, we must move it. */
19951 if (regs_available_for_popping == 0
19952 && reg_containing_return_addr == LAST_ARG_REGNUM)
19954 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19955 reg_containing_return_addr = LR_REGNUM;
19957 else if (size > 12)
19959 /* Register a4 is being used to hold part of the return value,
19960 but we have dire need of a free, low register. */
19963 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19966 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19968 /* The fourth argument register is available. */
19969 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19975 /* Pop as many registers as we can. */
19976 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19977 regs_available_for_popping);
19979 /* Process the registers we popped. */
19980 if (reg_containing_return_addr == -1)
19982 /* The return address was popped into the lowest numbered register. */
19983 regs_to_pop &= ~(1 << LR_REGNUM);
19985 reg_containing_return_addr =
19986 number_of_first_bit_set (regs_available_for_popping);
19988 /* Remove this register for the mask of available registers, so that
19989 the return address will not be corrupted by further pops. */
19990 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19993 /* If we popped other registers then handle them here. */
19994 if (regs_available_for_popping)
19998 /* Work out which register currently contains the frame pointer. */
19999 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20001 /* Move it into the correct place. */
20002 asm_fprintf (f, "\tmov\t%r, %r\n",
20003 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20005 /* (Temporarily) remove it from the mask of popped registers. */
20006 regs_available_for_popping &= ~(1 << frame_pointer);
20007 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20009 if (regs_available_for_popping)
20013 /* We popped the stack pointer as well,
20014 find the register that contains it. */
20015 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20017 /* Move it into the stack register. */
20018 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20020 /* At this point we have popped all necessary registers, so
20021 do not worry about restoring regs_available_for_popping
20022 to its correct value:
20024 assert (pops_needed == 0)
20025 assert (regs_available_for_popping == (1 << frame_pointer))
20026 assert (regs_to_pop == (1 << STACK_POINTER)) */
20030 /* Since we have just move the popped value into the frame
20031 pointer, the popping register is available for reuse, and
20032 we know that we still have the stack pointer left to pop. */
20033 regs_available_for_popping |= (1 << frame_pointer);
20037 /* If we still have registers left on the stack, but we no longer have
20038 any registers into which we can pop them, then we must move the return
20039 address into the link register and make available the register that
20041 if (regs_available_for_popping == 0 && pops_needed > 0)
20043 regs_available_for_popping |= 1 << reg_containing_return_addr;
20045 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20046 reg_containing_return_addr);
20048 reg_containing_return_addr = LR_REGNUM;
20051 /* If we have registers left on the stack then pop some more.
20052 We know that at most we will want to pop FP and SP. */
20053 if (pops_needed > 0)
20058 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20059 regs_available_for_popping);
20061 /* We have popped either FP or SP.
20062 Move whichever one it is into the correct register. */
20063 popped_into = number_of_first_bit_set (regs_available_for_popping);
20064 move_to = number_of_first_bit_set (regs_to_pop);
20066 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20068 regs_to_pop &= ~(1 << move_to);
20073 /* If we still have not popped everything then we must have only
20074 had one register available to us and we are now popping the SP. */
20075 if (pops_needed > 0)
20079 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20080 regs_available_for_popping);
20082 popped_into = number_of_first_bit_set (regs_available_for_popping);
20084 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20086 assert (regs_to_pop == (1 << STACK_POINTER))
20087 assert (pops_needed == 1)
20091 /* If necessary restore the a4 register. */
20094 if (reg_containing_return_addr != LR_REGNUM)
20096 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20097 reg_containing_return_addr = LR_REGNUM;
20100 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20103 if (crtl->calls_eh_return)
20104 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20106 /* Return to caller. */
20107 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20110 /* Scan INSN just before assembler is output for it.
20111 For Thumb-1, we track the status of the condition codes; this
20112 information is used in the cbranchsi4_insn pattern. */
20114 thumb1_final_prescan_insn (rtx insn)
20116 if (flag_print_asm_name)
20117 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20118 INSN_ADDRESSES (INSN_UID (insn)));
20119 /* Don't overwrite the previous setter when we get to a cbranch. */
20120 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20122 enum attr_conds conds;
20124 if (cfun->machine->thumb1_cc_insn)
20126 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20127 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20130 conds = get_attr_conds (insn);
20131 if (conds == CONDS_SET)
20133 rtx set = single_set (insn);
20134 cfun->machine->thumb1_cc_insn = insn;
20135 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20136 cfun->machine->thumb1_cc_op1 = const0_rtx;
20137 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20138 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20140 rtx src1 = XEXP (SET_SRC (set), 1);
20141 if (src1 == const0_rtx)
20142 cfun->machine->thumb1_cc_mode = CCmode;
20145 else if (conds != CONDS_NOCOND)
20146 cfun->machine->thumb1_cc_insn = NULL_RTX;
20151 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20153 unsigned HOST_WIDE_INT mask = 0xff;
20156 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20157 if (val == 0) /* XXX */
20160 for (i = 0; i < 25; i++)
20161 if ((val & (mask << i)) == val)
20167 /* Returns nonzero if the current function contains,
20168 or might contain a far jump. */
20170 thumb_far_jump_used_p (void)
20174 /* This test is only important for leaf functions. */
20175 /* assert (!leaf_function_p ()); */
20177 /* If we have already decided that far jumps may be used,
20178 do not bother checking again, and always return true even if
20179 it turns out that they are not being used. Once we have made
20180 the decision that far jumps are present (and that hence the link
20181 register will be pushed onto the stack) we cannot go back on it. */
20182 if (cfun->machine->far_jump_used)
20185 /* If this function is not being called from the prologue/epilogue
20186 generation code then it must be being called from the
20187 INITIAL_ELIMINATION_OFFSET macro. */
20188 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20190 /* In this case we know that we are being asked about the elimination
20191 of the arg pointer register. If that register is not being used,
20192 then there are no arguments on the stack, and we do not have to
20193 worry that a far jump might force the prologue to push the link
20194 register, changing the stack offsets. In this case we can just
20195 return false, since the presence of far jumps in the function will
20196 not affect stack offsets.
20198 If the arg pointer is live (or if it was live, but has now been
20199 eliminated and so set to dead) then we do have to test to see if
20200 the function might contain a far jump. This test can lead to some
20201 false negatives, since before reload is completed, then length of
20202 branch instructions is not known, so gcc defaults to returning their
20203 longest length, which in turn sets the far jump attribute to true.
20205 A false negative will not result in bad code being generated, but it
20206 will result in a needless push and pop of the link register. We
20207 hope that this does not occur too often.
20209 If we need doubleword stack alignment this could affect the other
20210 elimination offsets so we can't risk getting it wrong. */
20211 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20212 cfun->machine->arg_pointer_live = 1;
20213 else if (!cfun->machine->arg_pointer_live)
20217 /* Check to see if the function contains a branch
20218 insn with the far jump attribute set. */
20219 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20221 if (GET_CODE (insn) == JUMP_INSN
20222 /* Ignore tablejump patterns. */
20223 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20224 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20225 && get_attr_far_jump (insn) == FAR_JUMP_YES
20228 /* Record the fact that we have decided that
20229 the function does use far jumps. */
20230 cfun->machine->far_jump_used = 1;
20238 /* Return nonzero if FUNC must be entered in ARM mode. */
20240 is_called_in_ARM_mode (tree func)
20242 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20244 /* Ignore the problem about functions whose address is taken. */
20245 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20249 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20255 /* Given the stack offsets and register mask in OFFSETS, decide how
20256 many additional registers to push instead of subtracting a constant
20257 from SP. For epilogues the principle is the same except we use pop.
20258 FOR_PROLOGUE indicates which we're generating. */
20260 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20262 HOST_WIDE_INT amount;
20263 unsigned long live_regs_mask = offsets->saved_regs_mask;
20264 /* Extract a mask of the ones we can give to the Thumb's push/pop
20266 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20267 /* Then count how many other high registers will need to be pushed. */
20268 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20269 int n_free, reg_base;
20271 if (!for_prologue && frame_pointer_needed)
20272 amount = offsets->locals_base - offsets->saved_regs;
20274 amount = offsets->outgoing_args - offsets->saved_regs;
20276 /* If the stack frame size is 512 exactly, we can save one load
20277 instruction, which should make this a win even when optimizing
20279 if (!optimize_size && amount != 512)
20282 /* Can't do this if there are high registers to push. */
20283 if (high_regs_pushed != 0)
20286 /* Shouldn't do it in the prologue if no registers would normally
20287 be pushed at all. In the epilogue, also allow it if we'll have
20288 a pop insn for the PC. */
20291 || TARGET_BACKTRACE
20292 || (live_regs_mask & 1 << LR_REGNUM) == 0
20293 || TARGET_INTERWORK
20294 || crtl->args.pretend_args_size != 0))
20297 /* Don't do this if thumb_expand_prologue wants to emit instructions
20298 between the push and the stack frame allocation. */
20300 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20301 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20308 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20309 live_regs_mask >>= reg_base;
20312 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20313 && (for_prologue || call_used_regs[reg_base + n_free]))
20315 live_regs_mask >>= 1;
20321 gcc_assert (amount / 4 * 4 == amount);
20323 if (amount >= 512 && (amount - n_free * 4) < 512)
20324 return (amount - 508) / 4;
20325 if (amount <= n_free * 4)
20330 /* The bits which aren't usefully expanded as rtl. */
20332 thumb_unexpanded_epilogue (void)
20334 arm_stack_offsets *offsets;
20336 unsigned long live_regs_mask = 0;
20337 int high_regs_pushed = 0;
20339 int had_to_push_lr;
20342 if (cfun->machine->return_used_this_function != 0)
20345 if (IS_NAKED (arm_current_func_type ()))
20348 offsets = arm_get_frame_offsets ();
20349 live_regs_mask = offsets->saved_regs_mask;
20350 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20352 /* If we can deduce the registers used from the function's return value.
20353 This is more reliable that examining df_regs_ever_live_p () because that
20354 will be set if the register is ever used in the function, not just if
20355 the register is used to hold a return value. */
20356 size = arm_size_return_regs ();
20358 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20361 unsigned long extra_mask = (1 << extra_pop) - 1;
20362 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20365 /* The prolog may have pushed some high registers to use as
20366 work registers. e.g. the testsuite file:
20367 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20368 compiles to produce:
20369 push {r4, r5, r6, r7, lr}
20373 as part of the prolog. We have to undo that pushing here. */
20375 if (high_regs_pushed)
20377 unsigned long mask = live_regs_mask & 0xff;
20380 /* The available low registers depend on the size of the value we are
20388 /* Oh dear! We have no low registers into which we can pop
20391 ("no low registers available for popping high registers");
20393 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20394 if (live_regs_mask & (1 << next_hi_reg))
20397 while (high_regs_pushed)
20399 /* Find lo register(s) into which the high register(s) can
20401 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20403 if (mask & (1 << regno))
20404 high_regs_pushed--;
20405 if (high_regs_pushed == 0)
20409 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20411 /* Pop the values into the low register(s). */
20412 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20414 /* Move the value(s) into the high registers. */
20415 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20417 if (mask & (1 << regno))
20419 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20422 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20423 if (live_regs_mask & (1 << next_hi_reg))
20428 live_regs_mask &= ~0x0f00;
20431 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20432 live_regs_mask &= 0xff;
20434 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20436 /* Pop the return address into the PC. */
20437 if (had_to_push_lr)
20438 live_regs_mask |= 1 << PC_REGNUM;
20440 /* Either no argument registers were pushed or a backtrace
20441 structure was created which includes an adjusted stack
20442 pointer, so just pop everything. */
20443 if (live_regs_mask)
20444 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20447 /* We have either just popped the return address into the
20448 PC or it is was kept in LR for the entire function.
20449 Note that thumb_pushpop has already called thumb_exit if the
20450 PC was in the list. */
20451 if (!had_to_push_lr)
20452 thumb_exit (asm_out_file, LR_REGNUM);
20456 /* Pop everything but the return address. */
20457 if (live_regs_mask)
20458 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20461 if (had_to_push_lr)
20465 /* We have no free low regs, so save one. */
20466 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20470 /* Get the return address into a temporary register. */
20471 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20472 1 << LAST_ARG_REGNUM);
20476 /* Move the return address to lr. */
20477 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20479 /* Restore the low register. */
20480 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20485 regno = LAST_ARG_REGNUM;
20490 /* Remove the argument registers that were pushed onto the stack. */
20491 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20492 SP_REGNUM, SP_REGNUM,
20493 crtl->args.pretend_args_size);
20495 thumb_exit (asm_out_file, regno);
20501 /* Functions to save and restore machine-specific function data. */
20502 static struct machine_function *
20503 arm_init_machine_status (void)
20505 struct machine_function *machine;
20506 machine = ggc_alloc_cleared_machine_function ();
20508 #if ARM_FT_UNKNOWN != 0
20509 machine->func_type = ARM_FT_UNKNOWN;
20514 /* Return an RTX indicating where the return address to the
20515 calling function can be found. */
20517 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20522 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20525 /* Do anything needed before RTL is emitted for each function. */
20527 arm_init_expanders (void)
20529 /* Arrange to initialize and mark the machine per-function status. */
20530 init_machine_status = arm_init_machine_status;
20532 /* This is to stop the combine pass optimizing away the alignment
20533 adjustment of va_arg. */
20534 /* ??? It is claimed that this should not be necessary. */
20536 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20540 /* Like arm_compute_initial_elimination offset. Simpler because there
20541 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20542 to point at the base of the local variables after static stack
20543 space for a function has been allocated. */
20546 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20548 arm_stack_offsets *offsets;
20550 offsets = arm_get_frame_offsets ();
20554 case ARG_POINTER_REGNUM:
20557 case STACK_POINTER_REGNUM:
20558 return offsets->outgoing_args - offsets->saved_args;
20560 case FRAME_POINTER_REGNUM:
20561 return offsets->soft_frame - offsets->saved_args;
20563 case ARM_HARD_FRAME_POINTER_REGNUM:
20564 return offsets->saved_regs - offsets->saved_args;
20566 case THUMB_HARD_FRAME_POINTER_REGNUM:
20567 return offsets->locals_base - offsets->saved_args;
20570 gcc_unreachable ();
20574 case FRAME_POINTER_REGNUM:
20577 case STACK_POINTER_REGNUM:
20578 return offsets->outgoing_args - offsets->soft_frame;
20580 case ARM_HARD_FRAME_POINTER_REGNUM:
20581 return offsets->saved_regs - offsets->soft_frame;
20583 case THUMB_HARD_FRAME_POINTER_REGNUM:
20584 return offsets->locals_base - offsets->soft_frame;
20587 gcc_unreachable ();
20592 gcc_unreachable ();
20596 /* Generate the rest of a function's prologue. */
20598 thumb1_expand_prologue (void)
20602 HOST_WIDE_INT amount;
20603 arm_stack_offsets *offsets;
20604 unsigned long func_type;
20606 unsigned long live_regs_mask;
20608 func_type = arm_current_func_type ();
20610 /* Naked functions don't have prologues. */
20611 if (IS_NAKED (func_type))
20614 if (IS_INTERRUPT (func_type))
20616 error ("interrupt Service Routines cannot be coded in Thumb mode");
20620 offsets = arm_get_frame_offsets ();
20621 live_regs_mask = offsets->saved_regs_mask;
20622 /* Load the pic register before setting the frame pointer,
20623 so we can use r7 as a temporary work register. */
20624 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20625 arm_load_pic_register (live_regs_mask);
20627 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20628 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20629 stack_pointer_rtx);
20631 amount = offsets->outgoing_args - offsets->saved_regs;
20632 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20637 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20638 GEN_INT (- amount)));
20639 RTX_FRAME_RELATED_P (insn) = 1;
20645 /* The stack decrement is too big for an immediate value in a single
20646 insn. In theory we could issue multiple subtracts, but after
20647 three of them it becomes more space efficient to place the full
20648 value in the constant pool and load into a register. (Also the
20649 ARM debugger really likes to see only one stack decrement per
20650 function). So instead we look for a scratch register into which
20651 we can load the decrement, and then we subtract this from the
20652 stack pointer. Unfortunately on the thumb the only available
20653 scratch registers are the argument registers, and we cannot use
20654 these as they may hold arguments to the function. Instead we
20655 attempt to locate a call preserved register which is used by this
20656 function. If we can find one, then we know that it will have
20657 been pushed at the start of the prologue and so we can corrupt
20659 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20660 if (live_regs_mask & (1 << regno))
20663 gcc_assert(regno <= LAST_LO_REGNUM);
20665 reg = gen_rtx_REG (SImode, regno);
20667 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20669 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20670 stack_pointer_rtx, reg));
20671 RTX_FRAME_RELATED_P (insn) = 1;
20672 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20673 plus_constant (stack_pointer_rtx,
20675 RTX_FRAME_RELATED_P (dwarf) = 1;
20676 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20680 if (frame_pointer_needed)
20681 thumb_set_frame_pointer (offsets);
20683 /* If we are profiling, make sure no instructions are scheduled before
20684 the call to mcount. Similarly if the user has requested no
20685 scheduling in the prolog. Similarly if we want non-call exceptions
20686 using the EABI unwinder, to prevent faulting instructions from being
20687 swapped with a stack adjustment. */
20688 if (crtl->profile || !TARGET_SCHED_PROLOG
20689 || (arm_except_unwind_info (&global_options) == UI_TARGET
20690 && cfun->can_throw_non_call_exceptions))
20691 emit_insn (gen_blockage ());
20693 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20694 if (live_regs_mask & 0xff)
20695 cfun->machine->lr_save_eliminated = 0;
20700 thumb1_expand_epilogue (void)
20702 HOST_WIDE_INT amount;
20703 arm_stack_offsets *offsets;
20706 /* Naked functions don't have prologues. */
20707 if (IS_NAKED (arm_current_func_type ()))
20710 offsets = arm_get_frame_offsets ();
20711 amount = offsets->outgoing_args - offsets->saved_regs;
20713 if (frame_pointer_needed)
20715 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20716 amount = offsets->locals_base - offsets->saved_regs;
20718 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20720 gcc_assert (amount >= 0);
20724 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20725 GEN_INT (amount)));
20728 /* r3 is always free in the epilogue. */
20729 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20731 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20732 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20736 /* Emit a USE (stack_pointer_rtx), so that
20737 the stack adjustment will not be deleted. */
20738 emit_insn (gen_prologue_use (stack_pointer_rtx));
20740 if (crtl->profile || !TARGET_SCHED_PROLOG)
20741 emit_insn (gen_blockage ());
20743 /* Emit a clobber for each insn that will be restored in the epilogue,
20744 so that flow2 will get register lifetimes correct. */
20745 for (regno = 0; regno < 13; regno++)
20746 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20747 emit_clobber (gen_rtx_REG (SImode, regno));
20749 if (! df_regs_ever_live_p (LR_REGNUM))
20750 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20754 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20756 arm_stack_offsets *offsets;
20757 unsigned long live_regs_mask = 0;
20758 unsigned long l_mask;
20759 unsigned high_regs_pushed = 0;
20760 int cfa_offset = 0;
20763 if (IS_NAKED (arm_current_func_type ()))
20766 if (is_called_in_ARM_mode (current_function_decl))
20770 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20771 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20773 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20775 /* Generate code sequence to switch us into Thumb mode. */
20776 /* The .code 32 directive has already been emitted by
20777 ASM_DECLARE_FUNCTION_NAME. */
20778 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20779 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20781 /* Generate a label, so that the debugger will notice the
20782 change in instruction sets. This label is also used by
20783 the assembler to bypass the ARM code when this function
20784 is called from a Thumb encoded function elsewhere in the
20785 same file. Hence the definition of STUB_NAME here must
20786 agree with the definition in gas/config/tc-arm.c. */
20788 #define STUB_NAME ".real_start_of"
20790 fprintf (f, "\t.code\t16\n");
20792 if (arm_dllexport_name_p (name))
20793 name = arm_strip_name_encoding (name);
20795 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20796 fprintf (f, "\t.thumb_func\n");
20797 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20800 if (crtl->args.pretend_args_size)
20802 /* Output unwind directive for the stack adjustment. */
20803 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20804 fprintf (f, "\t.pad #%d\n",
20805 crtl->args.pretend_args_size);
20807 if (cfun->machine->uses_anonymous_args)
20811 fprintf (f, "\tpush\t{");
20813 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20815 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20816 regno <= LAST_ARG_REGNUM;
20818 asm_fprintf (f, "%r%s", regno,
20819 regno == LAST_ARG_REGNUM ? "" : ", ");
20821 fprintf (f, "}\n");
20824 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20825 SP_REGNUM, SP_REGNUM,
20826 crtl->args.pretend_args_size);
20828 /* We don't need to record the stores for unwinding (would it
20829 help the debugger any if we did?), but record the change in
20830 the stack pointer. */
20831 if (dwarf2out_do_frame ())
20833 char *l = dwarf2out_cfi_label (false);
20835 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20836 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20840 /* Get the registers we are going to push. */
20841 offsets = arm_get_frame_offsets ();
20842 live_regs_mask = offsets->saved_regs_mask;
20843 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20844 l_mask = live_regs_mask & 0x40ff;
20845 /* Then count how many other high registers will need to be pushed. */
20846 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20848 if (TARGET_BACKTRACE)
20851 unsigned work_register;
20853 /* We have been asked to create a stack backtrace structure.
20854 The code looks like this:
20858 0 sub SP, #16 Reserve space for 4 registers.
20859 2 push {R7} Push low registers.
20860 4 add R7, SP, #20 Get the stack pointer before the push.
20861 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20862 8 mov R7, PC Get hold of the start of this code plus 12.
20863 10 str R7, [SP, #16] Store it.
20864 12 mov R7, FP Get hold of the current frame pointer.
20865 14 str R7, [SP, #4] Store it.
20866 16 mov R7, LR Get hold of the current return address.
20867 18 str R7, [SP, #12] Store it.
20868 20 add R7, SP, #16 Point at the start of the backtrace structure.
20869 22 mov FP, R7 Put this value into the frame pointer. */
20871 work_register = thumb_find_work_register (live_regs_mask);
20873 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20874 asm_fprintf (f, "\t.pad #16\n");
20877 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20878 SP_REGNUM, SP_REGNUM);
20880 if (dwarf2out_do_frame ())
20882 char *l = dwarf2out_cfi_label (false);
20884 cfa_offset = cfa_offset + 16;
20885 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20890 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20891 offset = bit_count (l_mask) * UNITS_PER_WORD;
20896 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20897 offset + 16 + crtl->args.pretend_args_size);
20899 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20902 /* Make sure that the instruction fetching the PC is in the right place
20903 to calculate "start of backtrace creation code + 12". */
20906 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20907 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20909 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20910 ARM_HARD_FRAME_POINTER_REGNUM);
20911 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20916 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20917 ARM_HARD_FRAME_POINTER_REGNUM);
20918 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20920 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20921 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20925 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20926 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20928 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20930 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20931 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20933 /* Optimization: If we are not pushing any low registers but we are going
20934 to push some high registers then delay our first push. This will just
20935 be a push of LR and we can combine it with the push of the first high
20937 else if ((l_mask & 0xff) != 0
20938 || (high_regs_pushed == 0 && l_mask))
20940 unsigned long mask = l_mask;
20941 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20942 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20945 if (high_regs_pushed)
20947 unsigned pushable_regs;
20948 unsigned next_hi_reg;
20950 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20951 if (live_regs_mask & (1 << next_hi_reg))
20954 pushable_regs = l_mask & 0xff;
20956 if (pushable_regs == 0)
20957 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20959 while (high_regs_pushed > 0)
20961 unsigned long real_regs_mask = 0;
20963 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20965 if (pushable_regs & (1 << regno))
20967 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20969 high_regs_pushed --;
20970 real_regs_mask |= (1 << next_hi_reg);
20972 if (high_regs_pushed)
20974 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20976 if (live_regs_mask & (1 << next_hi_reg))
20981 pushable_regs &= ~((1 << regno) - 1);
20987 /* If we had to find a work register and we have not yet
20988 saved the LR then add it to the list of regs to push. */
20989 if (l_mask == (1 << LR_REGNUM))
20991 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20993 real_regs_mask | (1 << LR_REGNUM));
20997 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21002 /* Handle the case of a double word load into a low register from
21003 a computed memory address. The computed address may involve a
21004 register which is overwritten by the load. */
21006 thumb_load_double_from_address (rtx *operands)
21014 gcc_assert (GET_CODE (operands[0]) == REG);
21015 gcc_assert (GET_CODE (operands[1]) == MEM);
21017 /* Get the memory address. */
21018 addr = XEXP (operands[1], 0);
21020 /* Work out how the memory address is computed. */
21021 switch (GET_CODE (addr))
21024 operands[2] = adjust_address (operands[1], SImode, 4);
21026 if (REGNO (operands[0]) == REGNO (addr))
21028 output_asm_insn ("ldr\t%H0, %2", operands);
21029 output_asm_insn ("ldr\t%0, %1", operands);
21033 output_asm_insn ("ldr\t%0, %1", operands);
21034 output_asm_insn ("ldr\t%H0, %2", operands);
21039 /* Compute <address> + 4 for the high order load. */
21040 operands[2] = adjust_address (operands[1], SImode, 4);
21042 output_asm_insn ("ldr\t%0, %1", operands);
21043 output_asm_insn ("ldr\t%H0, %2", operands);
21047 arg1 = XEXP (addr, 0);
21048 arg2 = XEXP (addr, 1);
21050 if (CONSTANT_P (arg1))
21051 base = arg2, offset = arg1;
21053 base = arg1, offset = arg2;
21055 gcc_assert (GET_CODE (base) == REG);
21057 /* Catch the case of <address> = <reg> + <reg> */
21058 if (GET_CODE (offset) == REG)
21060 int reg_offset = REGNO (offset);
21061 int reg_base = REGNO (base);
21062 int reg_dest = REGNO (operands[0]);
21064 /* Add the base and offset registers together into the
21065 higher destination register. */
21066 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21067 reg_dest + 1, reg_base, reg_offset);
21069 /* Load the lower destination register from the address in
21070 the higher destination register. */
21071 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21072 reg_dest, reg_dest + 1);
21074 /* Load the higher destination register from its own address
21076 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21077 reg_dest + 1, reg_dest + 1);
21081 /* Compute <address> + 4 for the high order load. */
21082 operands[2] = adjust_address (operands[1], SImode, 4);
21084 /* If the computed address is held in the low order register
21085 then load the high order register first, otherwise always
21086 load the low order register first. */
21087 if (REGNO (operands[0]) == REGNO (base))
21089 output_asm_insn ("ldr\t%H0, %2", operands);
21090 output_asm_insn ("ldr\t%0, %1", operands);
21094 output_asm_insn ("ldr\t%0, %1", operands);
21095 output_asm_insn ("ldr\t%H0, %2", operands);
21101 /* With no registers to worry about we can just load the value
21103 operands[2] = adjust_address (operands[1], SImode, 4);
21105 output_asm_insn ("ldr\t%H0, %2", operands);
21106 output_asm_insn ("ldr\t%0, %1", operands);
21110 gcc_unreachable ();
21117 thumb_output_move_mem_multiple (int n, rtx *operands)
21124 if (REGNO (operands[4]) > REGNO (operands[5]))
21127 operands[4] = operands[5];
21130 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21131 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21135 if (REGNO (operands[4]) > REGNO (operands[5]))
21138 operands[4] = operands[5];
21141 if (REGNO (operands[5]) > REGNO (operands[6]))
21144 operands[5] = operands[6];
21147 if (REGNO (operands[4]) > REGNO (operands[5]))
21150 operands[4] = operands[5];
21154 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21155 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21159 gcc_unreachable ();
21165 /* Output a call-via instruction for thumb state. */
21167 thumb_call_via_reg (rtx reg)
21169 int regno = REGNO (reg);
21172 gcc_assert (regno < LR_REGNUM);
21174 /* If we are in the normal text section we can use a single instance
21175 per compilation unit. If we are doing function sections, then we need
21176 an entry per section, since we can't rely on reachability. */
21177 if (in_section == text_section)
21179 thumb_call_reg_needed = 1;
21181 if (thumb_call_via_label[regno] == NULL)
21182 thumb_call_via_label[regno] = gen_label_rtx ();
21183 labelp = thumb_call_via_label + regno;
21187 if (cfun->machine->call_via[regno] == NULL)
21188 cfun->machine->call_via[regno] = gen_label_rtx ();
21189 labelp = cfun->machine->call_via + regno;
21192 output_asm_insn ("bl\t%a0", labelp);
21196 /* Routines for generating rtl. */
21198 thumb_expand_movmemqi (rtx *operands)
21200 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21201 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21202 HOST_WIDE_INT len = INTVAL (operands[2]);
21203 HOST_WIDE_INT offset = 0;
21207 emit_insn (gen_movmem12b (out, in, out, in));
21213 emit_insn (gen_movmem8b (out, in, out, in));
21219 rtx reg = gen_reg_rtx (SImode);
21220 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21221 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21228 rtx reg = gen_reg_rtx (HImode);
21229 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21230 plus_constant (in, offset))));
21231 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21239 rtx reg = gen_reg_rtx (QImode);
21240 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21241 plus_constant (in, offset))));
21242 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21248 thumb_reload_out_hi (rtx *operands)
21250 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21253 /* Handle reading a half-word from memory during reload. */
21255 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21257 gcc_unreachable ();
21260 /* Return the length of a function name prefix
21261 that starts with the character 'c'. */
21263 arm_get_strip_length (int c)
21267 ARM_NAME_ENCODING_LENGTHS
21272 /* Return a pointer to a function's name with any
21273 and all prefix encodings stripped from it. */
21275 arm_strip_name_encoding (const char *name)
21279 while ((skip = arm_get_strip_length (* name)))
21285 /* If there is a '*' anywhere in the name's prefix, then
21286 emit the stripped name verbatim, otherwise prepend an
21287 underscore if leading underscores are being used. */
21289 arm_asm_output_labelref (FILE *stream, const char *name)
21294 while ((skip = arm_get_strip_length (* name)))
21296 verbatim |= (*name == '*');
21301 fputs (name, stream);
21303 asm_fprintf (stream, "%U%s", name);
21307 arm_file_start (void)
21311 if (TARGET_UNIFIED_ASM)
21312 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21316 const char *fpu_name;
21317 if (arm_selected_arch)
21318 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21320 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21322 if (TARGET_SOFT_FLOAT)
21325 fpu_name = "softvfp";
21327 fpu_name = "softfpa";
21331 fpu_name = arm_fpu_desc->name;
21332 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21334 if (TARGET_HARD_FLOAT)
21335 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21336 if (TARGET_HARD_FLOAT_ABI)
21337 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21340 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21342 /* Some of these attributes only apply when the corresponding features
21343 are used. However we don't have any easy way of figuring this out.
21344 Conservatively record the setting that would have been used. */
21346 /* Tag_ABI_FP_rounding. */
21347 if (flag_rounding_math)
21348 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21349 if (!flag_unsafe_math_optimizations)
21351 /* Tag_ABI_FP_denomal. */
21352 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21353 /* Tag_ABI_FP_exceptions. */
21354 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21356 /* Tag_ABI_FP_user_exceptions. */
21357 if (flag_signaling_nans)
21358 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21359 /* Tag_ABI_FP_number_model. */
21360 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21361 flag_finite_math_only ? 1 : 3);
21363 /* Tag_ABI_align8_needed. */
21364 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21365 /* Tag_ABI_align8_preserved. */
21366 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21367 /* Tag_ABI_enum_size. */
21368 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21369 flag_short_enums ? 1 : 2);
21371 /* Tag_ABI_optimization_goals. */
21374 else if (optimize >= 2)
21380 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21382 /* Tag_ABI_FP_16bit_format. */
21383 if (arm_fp16_format)
21384 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21385 (int)arm_fp16_format);
21387 if (arm_lang_output_object_attributes_hook)
21388 arm_lang_output_object_attributes_hook();
21390 default_file_start();
21394 arm_file_end (void)
21398 if (NEED_INDICATE_EXEC_STACK)
21399 /* Add .note.GNU-stack. */
21400 file_end_indicate_exec_stack ();
21402 if (! thumb_call_reg_needed)
21405 switch_to_section (text_section);
21406 asm_fprintf (asm_out_file, "\t.code 16\n");
21407 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21409 for (regno = 0; regno < LR_REGNUM; regno++)
21411 rtx label = thumb_call_via_label[regno];
21415 targetm.asm_out.internal_label (asm_out_file, "L",
21416 CODE_LABEL_NUMBER (label));
21417 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21423 /* Symbols in the text segment can be accessed without indirecting via the
21424 constant pool; it may take an extra binary operation, but this is still
21425 faster than indirecting via memory. Don't do this when not optimizing,
21426 since we won't be calculating al of the offsets necessary to do this
21430 arm_encode_section_info (tree decl, rtx rtl, int first)
21432 if (optimize > 0 && TREE_CONSTANT (decl))
21433 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21435 default_encode_section_info (decl, rtl, first);
21437 #endif /* !ARM_PE */
21440 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21442 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21443 && !strcmp (prefix, "L"))
21445 arm_ccfsm_state = 0;
21446 arm_target_insn = NULL;
21448 default_internal_label (stream, prefix, labelno);
21451 /* Output code to add DELTA to the first argument, and then jump
21452 to FUNCTION. Used for C++ multiple inheritance. */
21454 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21455 HOST_WIDE_INT delta,
21456 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21459 static int thunk_label = 0;
21462 int mi_delta = delta;
21463 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21465 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21468 mi_delta = - mi_delta;
21472 int labelno = thunk_label++;
21473 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21474 /* Thunks are entered in arm mode when avaiable. */
21475 if (TARGET_THUMB1_ONLY)
21477 /* push r3 so we can use it as a temporary. */
21478 /* TODO: Omit this save if r3 is not used. */
21479 fputs ("\tpush {r3}\n", file);
21480 fputs ("\tldr\tr3, ", file);
21484 fputs ("\tldr\tr12, ", file);
21486 assemble_name (file, label);
21487 fputc ('\n', file);
21490 /* If we are generating PIC, the ldr instruction below loads
21491 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21492 the address of the add + 8, so we have:
21494 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21497 Note that we have "+ 1" because some versions of GNU ld
21498 don't set the low bit of the result for R_ARM_REL32
21499 relocations against thumb function symbols.
21500 On ARMv6M this is +4, not +8. */
21501 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21502 assemble_name (file, labelpc);
21503 fputs (":\n", file);
21504 if (TARGET_THUMB1_ONLY)
21506 /* This is 2 insns after the start of the thunk, so we know it
21507 is 4-byte aligned. */
21508 fputs ("\tadd\tr3, pc, r3\n", file);
21509 fputs ("\tmov r12, r3\n", file);
21512 fputs ("\tadd\tr12, pc, r12\n", file);
21514 else if (TARGET_THUMB1_ONLY)
21515 fputs ("\tmov r12, r3\n", file);
21517 if (TARGET_THUMB1_ONLY)
21519 if (mi_delta > 255)
21521 fputs ("\tldr\tr3, ", file);
21522 assemble_name (file, label);
21523 fputs ("+4\n", file);
21524 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21525 mi_op, this_regno, this_regno);
21527 else if (mi_delta != 0)
21529 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21530 mi_op, this_regno, this_regno,
21536 /* TODO: Use movw/movt for large constants when available. */
21537 while (mi_delta != 0)
21539 if ((mi_delta & (3 << shift)) == 0)
21543 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21544 mi_op, this_regno, this_regno,
21545 mi_delta & (0xff << shift));
21546 mi_delta &= ~(0xff << shift);
21553 if (TARGET_THUMB1_ONLY)
21554 fputs ("\tpop\t{r3}\n", file);
21556 fprintf (file, "\tbx\tr12\n");
21557 ASM_OUTPUT_ALIGN (file, 2);
21558 assemble_name (file, label);
21559 fputs (":\n", file);
21562 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21563 rtx tem = XEXP (DECL_RTL (function), 0);
21564 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21565 tem = gen_rtx_MINUS (GET_MODE (tem),
21567 gen_rtx_SYMBOL_REF (Pmode,
21568 ggc_strdup (labelpc)));
21569 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21572 /* Output ".word .LTHUNKn". */
21573 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21575 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21576 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21580 fputs ("\tb\t", file);
21581 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21582 if (NEED_PLT_RELOC)
21583 fputs ("(PLT)", file);
21584 fputc ('\n', file);
21589 arm_emit_vector_const (FILE *file, rtx x)
21592 const char * pattern;
21594 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21596 switch (GET_MODE (x))
21598 case V2SImode: pattern = "%08x"; break;
21599 case V4HImode: pattern = "%04x"; break;
21600 case V8QImode: pattern = "%02x"; break;
21601 default: gcc_unreachable ();
21604 fprintf (file, "0x");
21605 for (i = CONST_VECTOR_NUNITS (x); i--;)
21609 element = CONST_VECTOR_ELT (x, i);
21610 fprintf (file, pattern, INTVAL (element));
21616 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21617 HFmode constant pool entries are actually loaded with ldr. */
21619 arm_emit_fp16_const (rtx c)
21624 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21625 bits = real_to_target (NULL, &r, HFmode);
21626 if (WORDS_BIG_ENDIAN)
21627 assemble_zeros (2);
21628 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21629 if (!WORDS_BIG_ENDIAN)
21630 assemble_zeros (2);
21634 arm_output_load_gr (rtx *operands)
21641 if (GET_CODE (operands [1]) != MEM
21642 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21643 || GET_CODE (reg = XEXP (sum, 0)) != REG
21644 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21645 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21646 return "wldrw%?\t%0, %1";
21648 /* Fix up an out-of-range load of a GR register. */
21649 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21650 wcgr = operands[0];
21652 output_asm_insn ("ldr%?\t%0, %1", operands);
21654 operands[0] = wcgr;
21656 output_asm_insn ("tmcr%?\t%0, %1", operands);
21657 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21662 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21664 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21665 named arg and all anonymous args onto the stack.
21666 XXX I know the prologue shouldn't be pushing registers, but it is faster
21670 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21671 enum machine_mode mode,
21674 int second_time ATTRIBUTE_UNUSED)
21678 cfun->machine->uses_anonymous_args = 1;
21679 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21681 nregs = pcum->aapcs_ncrn;
21682 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21686 nregs = pcum->nregs;
21688 if (nregs < NUM_ARG_REGS)
21689 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21692 /* Return nonzero if the CONSUMER instruction (a store) does not need
21693 PRODUCER's value to calculate the address. */
21696 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21698 rtx value = PATTERN (producer);
21699 rtx addr = PATTERN (consumer);
21701 if (GET_CODE (value) == COND_EXEC)
21702 value = COND_EXEC_CODE (value);
21703 if (GET_CODE (value) == PARALLEL)
21704 value = XVECEXP (value, 0, 0);
21705 value = XEXP (value, 0);
21706 if (GET_CODE (addr) == COND_EXEC)
21707 addr = COND_EXEC_CODE (addr);
21708 if (GET_CODE (addr) == PARALLEL)
21709 addr = XVECEXP (addr, 0, 0);
21710 addr = XEXP (addr, 0);
21712 return !reg_overlap_mentioned_p (value, addr);
21715 /* Return nonzero if the CONSUMER instruction (a store) does need
21716 PRODUCER's value to calculate the address. */
21719 arm_early_store_addr_dep (rtx producer, rtx consumer)
21721 return !arm_no_early_store_addr_dep (producer, consumer);
21724 /* Return nonzero if the CONSUMER instruction (a load) does need
21725 PRODUCER's value to calculate the address. */
21728 arm_early_load_addr_dep (rtx producer, rtx consumer)
21730 rtx value = PATTERN (producer);
21731 rtx addr = PATTERN (consumer);
21733 if (GET_CODE (value) == COND_EXEC)
21734 value = COND_EXEC_CODE (value);
21735 if (GET_CODE (value) == PARALLEL)
21736 value = XVECEXP (value, 0, 0);
21737 value = XEXP (value, 0);
21738 if (GET_CODE (addr) == COND_EXEC)
21739 addr = COND_EXEC_CODE (addr);
21740 if (GET_CODE (addr) == PARALLEL)
21741 addr = XVECEXP (addr, 0, 0);
21742 addr = XEXP (addr, 1);
21744 return reg_overlap_mentioned_p (value, addr);
21747 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21748 have an early register shift value or amount dependency on the
21749 result of PRODUCER. */
21752 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21754 rtx value = PATTERN (producer);
21755 rtx op = PATTERN (consumer);
21758 if (GET_CODE (value) == COND_EXEC)
21759 value = COND_EXEC_CODE (value);
21760 if (GET_CODE (value) == PARALLEL)
21761 value = XVECEXP (value, 0, 0);
21762 value = XEXP (value, 0);
21763 if (GET_CODE (op) == COND_EXEC)
21764 op = COND_EXEC_CODE (op);
21765 if (GET_CODE (op) == PARALLEL)
21766 op = XVECEXP (op, 0, 0);
21769 early_op = XEXP (op, 0);
21770 /* This is either an actual independent shift, or a shift applied to
21771 the first operand of another operation. We want the whole shift
21773 if (GET_CODE (early_op) == REG)
21776 return !reg_overlap_mentioned_p (value, early_op);
21779 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21780 have an early register shift value dependency on the result of
21784 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21786 rtx value = PATTERN (producer);
21787 rtx op = PATTERN (consumer);
21790 if (GET_CODE (value) == COND_EXEC)
21791 value = COND_EXEC_CODE (value);
21792 if (GET_CODE (value) == PARALLEL)
21793 value = XVECEXP (value, 0, 0);
21794 value = XEXP (value, 0);
21795 if (GET_CODE (op) == COND_EXEC)
21796 op = COND_EXEC_CODE (op);
21797 if (GET_CODE (op) == PARALLEL)
21798 op = XVECEXP (op, 0, 0);
21801 early_op = XEXP (op, 0);
21803 /* This is either an actual independent shift, or a shift applied to
21804 the first operand of another operation. We want the value being
21805 shifted, in either case. */
21806 if (GET_CODE (early_op) != REG)
21807 early_op = XEXP (early_op, 0);
21809 return !reg_overlap_mentioned_p (value, early_op);
21812 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21813 have an early register mult dependency on the result of
21817 arm_no_early_mul_dep (rtx producer, rtx consumer)
21819 rtx value = PATTERN (producer);
21820 rtx op = PATTERN (consumer);
21822 if (GET_CODE (value) == COND_EXEC)
21823 value = COND_EXEC_CODE (value);
21824 if (GET_CODE (value) == PARALLEL)
21825 value = XVECEXP (value, 0, 0);
21826 value = XEXP (value, 0);
21827 if (GET_CODE (op) == COND_EXEC)
21828 op = COND_EXEC_CODE (op);
21829 if (GET_CODE (op) == PARALLEL)
21830 op = XVECEXP (op, 0, 0);
21833 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21835 if (GET_CODE (XEXP (op, 0)) == MULT)
21836 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21838 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21844 /* We can't rely on the caller doing the proper promotion when
21845 using APCS or ATPCS. */
21848 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21850 return !TARGET_AAPCS_BASED;
21853 static enum machine_mode
21854 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21855 enum machine_mode mode,
21856 int *punsignedp ATTRIBUTE_UNUSED,
21857 const_tree fntype ATTRIBUTE_UNUSED,
21858 int for_return ATTRIBUTE_UNUSED)
21860 if (GET_MODE_CLASS (mode) == MODE_INT
21861 && GET_MODE_SIZE (mode) < 4)
21867 /* AAPCS based ABIs use short enums by default. */
21870 arm_default_short_enums (void)
21872 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21876 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21879 arm_align_anon_bitfield (void)
21881 return TARGET_AAPCS_BASED;
21885 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21888 arm_cxx_guard_type (void)
21890 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21893 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21894 has an accumulator dependency on the result of the producer (a
21895 multiplication instruction) and no other dependency on that result. */
21897 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21899 rtx mul = PATTERN (producer);
21900 rtx mac = PATTERN (consumer);
21902 rtx mac_op0, mac_op1, mac_acc;
21904 if (GET_CODE (mul) == COND_EXEC)
21905 mul = COND_EXEC_CODE (mul);
21906 if (GET_CODE (mac) == COND_EXEC)
21907 mac = COND_EXEC_CODE (mac);
21909 /* Check that mul is of the form (set (...) (mult ...))
21910 and mla is of the form (set (...) (plus (mult ...) (...))). */
21911 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21912 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21913 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21916 mul_result = XEXP (mul, 0);
21917 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21918 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21919 mac_acc = XEXP (XEXP (mac, 1), 1);
21921 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21922 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21923 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21927 /* The EABI says test the least significant bit of a guard variable. */
21930 arm_cxx_guard_mask_bit (void)
21932 return TARGET_AAPCS_BASED;
21936 /* The EABI specifies that all array cookies are 8 bytes long. */
21939 arm_get_cookie_size (tree type)
21943 if (!TARGET_AAPCS_BASED)
21944 return default_cxx_get_cookie_size (type);
21946 size = build_int_cst (sizetype, 8);
21951 /* The EABI says that array cookies should also contain the element size. */
21954 arm_cookie_has_size (void)
21956 return TARGET_AAPCS_BASED;
21960 /* The EABI says constructors and destructors should return a pointer to
21961 the object constructed/destroyed. */
21964 arm_cxx_cdtor_returns_this (void)
21966 return TARGET_AAPCS_BASED;
21969 /* The EABI says that an inline function may never be the key
21973 arm_cxx_key_method_may_be_inline (void)
21975 return !TARGET_AAPCS_BASED;
21979 arm_cxx_determine_class_data_visibility (tree decl)
21981 if (!TARGET_AAPCS_BASED
21982 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21985 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21986 is exported. However, on systems without dynamic vague linkage,
21987 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21988 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21989 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21991 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21992 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21996 arm_cxx_class_data_always_comdat (void)
21998 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21999 vague linkage if the class has no key function. */
22000 return !TARGET_AAPCS_BASED;
22004 /* The EABI says __aeabi_atexit should be used to register static
22008 arm_cxx_use_aeabi_atexit (void)
22010 return TARGET_AAPCS_BASED;
22015 arm_set_return_address (rtx source, rtx scratch)
22017 arm_stack_offsets *offsets;
22018 HOST_WIDE_INT delta;
22020 unsigned long saved_regs;
22022 offsets = arm_get_frame_offsets ();
22023 saved_regs = offsets->saved_regs_mask;
22025 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22026 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22029 if (frame_pointer_needed)
22030 addr = plus_constant(hard_frame_pointer_rtx, -4);
22033 /* LR will be the first saved register. */
22034 delta = offsets->outgoing_args - (offsets->frame + 4);
22039 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22040 GEN_INT (delta & ~4095)));
22045 addr = stack_pointer_rtx;
22047 addr = plus_constant (addr, delta);
22049 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22055 thumb_set_return_address (rtx source, rtx scratch)
22057 arm_stack_offsets *offsets;
22058 HOST_WIDE_INT delta;
22059 HOST_WIDE_INT limit;
22062 unsigned long mask;
22066 offsets = arm_get_frame_offsets ();
22067 mask = offsets->saved_regs_mask;
22068 if (mask & (1 << LR_REGNUM))
22071 /* Find the saved regs. */
22072 if (frame_pointer_needed)
22074 delta = offsets->soft_frame - offsets->saved_args;
22075 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22081 delta = offsets->outgoing_args - offsets->saved_args;
22084 /* Allow for the stack frame. */
22085 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22087 /* The link register is always the first saved register. */
22090 /* Construct the address. */
22091 addr = gen_rtx_REG (SImode, reg);
22094 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22095 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22099 addr = plus_constant (addr, delta);
22101 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22104 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22107 /* Implements target hook vector_mode_supported_p. */
22109 arm_vector_mode_supported_p (enum machine_mode mode)
22111 /* Neon also supports V2SImode, etc. listed in the clause below. */
22112 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22113 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22116 if ((TARGET_NEON || TARGET_IWMMXT)
22117 && ((mode == V2SImode)
22118 || (mode == V4HImode)
22119 || (mode == V8QImode)))
22125 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22126 registers when autovectorizing for Neon, at least until multiple vector
22127 widths are supported properly by the middle-end. */
22129 static enum machine_mode
22130 arm_preferred_simd_mode (enum machine_mode mode)
22136 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22138 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22140 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22142 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22144 if (TARGET_NEON_VECTORIZE_QUAD)
22151 if (TARGET_REALLY_IWMMXT)
22167 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22169 We need to define this for LO_REGS on thumb. Otherwise we can end up
22170 using r0-r4 for function arguments, r7 for the stack frame and don't
22171 have enough left over to do doubleword arithmetic. */
22174 arm_class_likely_spilled_p (reg_class_t rclass)
22176 if ((TARGET_THUMB && rclass == LO_REGS)
22177 || rclass == CC_REG)
22183 /* Implements target hook small_register_classes_for_mode_p. */
22185 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22187 return TARGET_THUMB1;
22190 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22191 ARM insns and therefore guarantee that the shift count is modulo 256.
22192 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22193 guarantee no particular behavior for out-of-range counts. */
22195 static unsigned HOST_WIDE_INT
22196 arm_shift_truncation_mask (enum machine_mode mode)
22198 return mode == SImode ? 255 : 0;
22202 /* Map internal gcc register numbers to DWARF2 register numbers. */
22205 arm_dbx_register_number (unsigned int regno)
22210 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22211 compatibility. The EABI defines them as registers 96-103. */
22212 if (IS_FPA_REGNUM (regno))
22213 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22215 if (IS_VFP_REGNUM (regno))
22217 /* See comment in arm_dwarf_register_span. */
22218 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22219 return 64 + regno - FIRST_VFP_REGNUM;
22221 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22224 if (IS_IWMMXT_GR_REGNUM (regno))
22225 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22227 if (IS_IWMMXT_REGNUM (regno))
22228 return 112 + regno - FIRST_IWMMXT_REGNUM;
22230 gcc_unreachable ();
22233 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22234 GCC models tham as 64 32-bit registers, so we need to describe this to
22235 the DWARF generation code. Other registers can use the default. */
22237 arm_dwarf_register_span (rtx rtl)
22244 regno = REGNO (rtl);
22245 if (!IS_VFP_REGNUM (regno))
22248 /* XXX FIXME: The EABI defines two VFP register ranges:
22249 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22251 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22252 corresponding D register. Until GDB supports this, we shall use the
22253 legacy encodings. We also use these encodings for D0-D15 for
22254 compatibility with older debuggers. */
22255 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22258 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22259 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22260 regno = (regno - FIRST_VFP_REGNUM) / 2;
22261 for (i = 0; i < nregs; i++)
22262 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22267 #if ARM_UNWIND_INFO
22268 /* Emit unwind directives for a store-multiple instruction or stack pointer
22269 push during alignment.
22270 These should only ever be generated by the function prologue code, so
22271 expect them to have a particular form. */
22274 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22277 HOST_WIDE_INT offset;
22278 HOST_WIDE_INT nregs;
22284 e = XVECEXP (p, 0, 0);
22285 if (GET_CODE (e) != SET)
22288 /* First insn will adjust the stack pointer. */
22289 if (GET_CODE (e) != SET
22290 || GET_CODE (XEXP (e, 0)) != REG
22291 || REGNO (XEXP (e, 0)) != SP_REGNUM
22292 || GET_CODE (XEXP (e, 1)) != PLUS)
22295 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22296 nregs = XVECLEN (p, 0) - 1;
22298 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22301 /* The function prologue may also push pc, but not annotate it as it is
22302 never restored. We turn this into a stack pointer adjustment. */
22303 if (nregs * 4 == offset - 4)
22305 fprintf (asm_out_file, "\t.pad #4\n");
22309 fprintf (asm_out_file, "\t.save {");
22311 else if (IS_VFP_REGNUM (reg))
22314 fprintf (asm_out_file, "\t.vsave {");
22316 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22318 /* FPA registers are done differently. */
22319 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22323 /* Unknown register type. */
22326 /* If the stack increment doesn't match the size of the saved registers,
22327 something has gone horribly wrong. */
22328 if (offset != nregs * reg_size)
22333 /* The remaining insns will describe the stores. */
22334 for (i = 1; i <= nregs; i++)
22336 /* Expect (set (mem <addr>) (reg)).
22337 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22338 e = XVECEXP (p, 0, i);
22339 if (GET_CODE (e) != SET
22340 || GET_CODE (XEXP (e, 0)) != MEM
22341 || GET_CODE (XEXP (e, 1)) != REG)
22344 reg = REGNO (XEXP (e, 1));
22349 fprintf (asm_out_file, ", ");
22350 /* We can't use %r for vfp because we need to use the
22351 double precision register names. */
22352 if (IS_VFP_REGNUM (reg))
22353 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22355 asm_fprintf (asm_out_file, "%r", reg);
22357 #ifdef ENABLE_CHECKING
22358 /* Check that the addresses are consecutive. */
22359 e = XEXP (XEXP (e, 0), 0);
22360 if (GET_CODE (e) == PLUS)
22362 offset += reg_size;
22363 if (GET_CODE (XEXP (e, 0)) != REG
22364 || REGNO (XEXP (e, 0)) != SP_REGNUM
22365 || GET_CODE (XEXP (e, 1)) != CONST_INT
22366 || offset != INTVAL (XEXP (e, 1)))
22370 || GET_CODE (e) != REG
22371 || REGNO (e) != SP_REGNUM)
22375 fprintf (asm_out_file, "}\n");
22378 /* Emit unwind directives for a SET. */
22381 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22389 switch (GET_CODE (e0))
22392 /* Pushing a single register. */
22393 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22394 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22395 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22398 asm_fprintf (asm_out_file, "\t.save ");
22399 if (IS_VFP_REGNUM (REGNO (e1)))
22400 asm_fprintf(asm_out_file, "{d%d}\n",
22401 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22403 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22407 if (REGNO (e0) == SP_REGNUM)
22409 /* A stack increment. */
22410 if (GET_CODE (e1) != PLUS
22411 || GET_CODE (XEXP (e1, 0)) != REG
22412 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22413 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22416 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22417 -INTVAL (XEXP (e1, 1)));
22419 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22421 HOST_WIDE_INT offset;
22423 if (GET_CODE (e1) == PLUS)
22425 if (GET_CODE (XEXP (e1, 0)) != REG
22426 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22428 reg = REGNO (XEXP (e1, 0));
22429 offset = INTVAL (XEXP (e1, 1));
22430 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22431 HARD_FRAME_POINTER_REGNUM, reg,
22434 else if (GET_CODE (e1) == REG)
22437 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22438 HARD_FRAME_POINTER_REGNUM, reg);
22443 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22445 /* Move from sp to reg. */
22446 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22448 else if (GET_CODE (e1) == PLUS
22449 && GET_CODE (XEXP (e1, 0)) == REG
22450 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22451 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22453 /* Set reg to offset from sp. */
22454 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22455 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22457 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22459 /* Stack pointer save before alignment. */
22461 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22474 /* Emit unwind directives for the given insn. */
22477 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22481 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22484 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22485 && (TREE_NOTHROW (current_function_decl)
22486 || crtl->all_throwers_are_sibcalls))
22489 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22492 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22494 pat = XEXP (pat, 0);
22496 pat = PATTERN (insn);
22498 switch (GET_CODE (pat))
22501 arm_unwind_emit_set (asm_out_file, pat);
22505 /* Store multiple. */
22506 arm_unwind_emit_sequence (asm_out_file, pat);
22515 /* Output a reference from a function exception table to the type_info
22516 object X. The EABI specifies that the symbol should be relocated by
22517 an R_ARM_TARGET2 relocation. */
22520 arm_output_ttype (rtx x)
22522 fputs ("\t.word\t", asm_out_file);
22523 output_addr_const (asm_out_file, x);
22524 /* Use special relocations for symbol references. */
22525 if (GET_CODE (x) != CONST_INT)
22526 fputs ("(TARGET2)", asm_out_file);
22527 fputc ('\n', asm_out_file);
22532 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22535 arm_asm_emit_except_personality (rtx personality)
22537 fputs ("\t.personality\t", asm_out_file);
22538 output_addr_const (asm_out_file, personality);
22539 fputc ('\n', asm_out_file);
22542 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22545 arm_asm_init_sections (void)
22547 exception_section = get_unnamed_section (0, output_section_asm_op,
22550 #endif /* ARM_UNWIND_INFO */
22552 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22554 static enum unwind_info_type
22555 arm_except_unwind_info (struct gcc_options *opts)
22557 /* Honor the --enable-sjlj-exceptions configure switch. */
22558 #ifdef CONFIG_SJLJ_EXCEPTIONS
22559 if (CONFIG_SJLJ_EXCEPTIONS)
22563 /* If not using ARM EABI unwind tables... */
22564 if (ARM_UNWIND_INFO)
22566 /* For simplicity elsewhere in this file, indicate that all unwind
22567 info is disabled if we're not emitting unwind tables. */
22568 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22574 /* ... we use sjlj exceptions for backwards compatibility. */
22579 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22580 stack alignment. */
22583 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22585 rtx unspec = SET_SRC (pattern);
22586 gcc_assert (GET_CODE (unspec) == UNSPEC);
22590 case UNSPEC_STACK_ALIGN:
22591 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22592 put anything on the stack, so hopefully it won't matter.
22593 CFA = SP will be correct after alignment. */
22594 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22595 SET_DEST (pattern));
22598 gcc_unreachable ();
22603 /* Output unwind directives for the start/end of a function. */
22606 arm_output_fn_unwind (FILE * f, bool prologue)
22608 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22612 fputs ("\t.fnstart\n", f);
22615 /* If this function will never be unwound, then mark it as such.
22616 The came condition is used in arm_unwind_emit to suppress
22617 the frame annotations. */
22618 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22619 && (TREE_NOTHROW (current_function_decl)
22620 || crtl->all_throwers_are_sibcalls))
22621 fputs("\t.cantunwind\n", f);
22623 fputs ("\t.fnend\n", f);
22628 arm_emit_tls_decoration (FILE *fp, rtx x)
22630 enum tls_reloc reloc;
22633 val = XVECEXP (x, 0, 0);
22634 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22636 output_addr_const (fp, val);
22641 fputs ("(tlsgd)", fp);
22644 fputs ("(tlsldm)", fp);
22647 fputs ("(tlsldo)", fp);
22650 fputs ("(gottpoff)", fp);
22653 fputs ("(tpoff)", fp);
22656 gcc_unreachable ();
22664 fputs (" + (. - ", fp);
22665 output_addr_const (fp, XVECEXP (x, 0, 2));
22667 output_addr_const (fp, XVECEXP (x, 0, 3));
22677 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22680 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22682 gcc_assert (size == 4);
22683 fputs ("\t.word\t", file);
22684 output_addr_const (file, x);
22685 fputs ("(tlsldo)", file);
22688 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22691 arm_output_addr_const_extra (FILE *fp, rtx x)
22693 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22694 return arm_emit_tls_decoration (fp, x);
22695 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22698 int labelno = INTVAL (XVECEXP (x, 0, 0));
22700 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22701 assemble_name_raw (fp, label);
22705 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22707 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22711 output_addr_const (fp, XVECEXP (x, 0, 0));
22715 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22717 output_addr_const (fp, XVECEXP (x, 0, 0));
22721 output_addr_const (fp, XVECEXP (x, 0, 1));
22725 else if (GET_CODE (x) == CONST_VECTOR)
22726 return arm_emit_vector_const (fp, x);
22731 /* Output assembly for a shift instruction.
22732 SET_FLAGS determines how the instruction modifies the condition codes.
22733 0 - Do not set condition codes.
22734 1 - Set condition codes.
22735 2 - Use smallest instruction. */
22737 arm_output_shift(rtx * operands, int set_flags)
22740 static const char flag_chars[3] = {'?', '.', '!'};
22745 c = flag_chars[set_flags];
22746 if (TARGET_UNIFIED_ASM)
22748 shift = shift_op(operands[3], &val);
22752 operands[2] = GEN_INT(val);
22753 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22756 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22759 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22760 output_asm_insn (pattern, operands);
22764 /* Output a Thumb-1 casesi dispatch sequence. */
22766 thumb1_output_casesi (rtx *operands)
22768 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22770 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22772 switch (GET_MODE(diff_vec))
22775 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22776 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22778 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22779 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22781 return "bl\t%___gnu_thumb1_case_si";
22783 gcc_unreachable ();
22787 /* Output a Thumb-2 casesi instruction. */
22789 thumb2_output_casesi (rtx *operands)
22791 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22793 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22795 output_asm_insn ("cmp\t%0, %1", operands);
22796 output_asm_insn ("bhi\t%l3", operands);
22797 switch (GET_MODE(diff_vec))
22800 return "tbb\t[%|pc, %0]";
22802 return "tbh\t[%|pc, %0, lsl #1]";
22806 output_asm_insn ("adr\t%4, %l2", operands);
22807 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22808 output_asm_insn ("add\t%4, %4, %5", operands);
22813 output_asm_insn ("adr\t%4, %l2", operands);
22814 return "ldr\t%|pc, [%4, %0, lsl #2]";
22817 gcc_unreachable ();
22821 /* Most ARM cores are single issue, but some newer ones can dual issue.
22822 The scheduler descriptions rely on this being correct. */
22824 arm_issue_rate (void)
22840 /* A table and a function to perform ARM-specific name mangling for
22841 NEON vector types in order to conform to the AAPCS (see "Procedure
22842 Call Standard for the ARM Architecture", Appendix A). To qualify
22843 for emission with the mangled names defined in that document, a
22844 vector type must not only be of the correct mode but also be
22845 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22848 enum machine_mode mode;
22849 const char *element_type_name;
22850 const char *aapcs_name;
22851 } arm_mangle_map_entry;
22853 static arm_mangle_map_entry arm_mangle_map[] = {
22854 /* 64-bit containerized types. */
22855 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22856 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22857 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22858 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22859 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22860 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22861 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22862 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22863 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22864 /* 128-bit containerized types. */
22865 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22866 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22867 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22868 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22869 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22870 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22871 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22872 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22873 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22874 { VOIDmode, NULL, NULL }
22878 arm_mangle_type (const_tree type)
22880 arm_mangle_map_entry *pos = arm_mangle_map;
22882 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22883 has to be managled as if it is in the "std" namespace. */
22884 if (TARGET_AAPCS_BASED
22885 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22887 static bool warned;
22888 if (!warned && warn_psabi && !in_system_header)
22891 inform (input_location,
22892 "the mangling of %<va_list%> has changed in GCC 4.4");
22894 return "St9__va_list";
22897 /* Half-precision float. */
22898 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22901 if (TREE_CODE (type) != VECTOR_TYPE)
22904 /* Check the mode of the vector type, and the name of the vector
22905 element type, against the table. */
22906 while (pos->mode != VOIDmode)
22908 tree elt_type = TREE_TYPE (type);
22910 if (pos->mode == TYPE_MODE (type)
22911 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22912 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22913 pos->element_type_name))
22914 return pos->aapcs_name;
22919 /* Use the default mangling for unrecognized (possibly user-defined)
22924 /* Order of allocation of core registers for Thumb: this allocation is
22925 written over the corresponding initial entries of the array
22926 initialized with REG_ALLOC_ORDER. We allocate all low registers
22927 first. Saving and restoring a low register is usually cheaper than
22928 using a call-clobbered high register. */
22930 static const int thumb_core_reg_alloc_order[] =
22932 3, 2, 1, 0, 4, 5, 6, 7,
22933 14, 12, 8, 9, 10, 11, 13, 15
22936 /* Adjust register allocation order when compiling for Thumb. */
22939 arm_order_regs_for_local_alloc (void)
22941 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22942 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22944 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22945 sizeof (thumb_core_reg_alloc_order));
22948 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22951 arm_frame_pointer_required (void)
22953 return (cfun->has_nonlocal_label
22954 || SUBTARGET_FRAME_POINTER_REQUIRED
22955 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22958 /* Only thumb1 can't support conditional execution, so return true if
22959 the target is not thumb1. */
22961 arm_have_conditional_execution (void)
22963 return !TARGET_THUMB1;
22966 /* Legitimize a memory reference for sync primitive implemented using
22967 ldrex / strex. We currently force the form of the reference to be
22968 indirect without offset. We do not yet support the indirect offset
22969 addressing supported by some ARM targets for these
22972 arm_legitimize_sync_memory (rtx memory)
22974 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22975 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22977 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22978 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22979 return legitimate_memory;
22982 /* An instruction emitter. */
22983 typedef void (* emit_f) (int label, const char *, rtx *);
22985 /* An instruction emitter that emits via the conventional
22986 output_asm_insn. */
22988 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22990 output_asm_insn (pattern, operands);
22993 /* Count the number of emitted synchronization instructions. */
22994 static unsigned arm_insn_count;
22996 /* An emitter that counts emitted instructions but does not actually
22997 emit instruction into the the instruction stream. */
22999 arm_count (int label,
23000 const char *pattern ATTRIBUTE_UNUSED,
23001 rtx *operands ATTRIBUTE_UNUSED)
23007 /* Construct a pattern using conventional output formatting and feed
23008 it to output_asm_insn. Provides a mechanism to construct the
23009 output pattern on the fly. Note the hard limit on the pattern
23011 static void ATTRIBUTE_PRINTF_4
23012 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23013 const char *pattern, ...)
23018 va_start (ap, pattern);
23019 vsprintf (buffer, pattern, ap);
23021 emit (label, buffer, operands);
23024 /* Emit the memory barrier instruction, if any, provided by this
23025 target to a specified emitter. */
23027 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23029 if (TARGET_HAVE_DMB)
23031 /* Note we issue a system level barrier. We should consider
23032 issuing a inner shareabilty zone barrier here instead, ie.
23034 emit (0, "dmb\tsy", operands);
23038 if (TARGET_HAVE_DMB_MCR)
23040 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23044 gcc_unreachable ();
23047 /* Emit the memory barrier instruction, if any, provided by this
23050 arm_output_memory_barrier (rtx *operands)
23052 arm_process_output_memory_barrier (arm_emit, operands);
23056 /* Helper to figure out the instruction suffix required on ldrex/strex
23057 for operations on an object of the specified mode. */
23058 static const char *
23059 arm_ldrex_suffix (enum machine_mode mode)
23063 case QImode: return "b";
23064 case HImode: return "h";
23065 case SImode: return "";
23066 case DImode: return "d";
23068 gcc_unreachable ();
23073 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23076 arm_output_ldrex (emit_f emit,
23077 enum machine_mode mode,
23081 const char *suffix = arm_ldrex_suffix (mode);
23084 operands[0] = target;
23085 operands[1] = memory;
23086 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23089 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23092 arm_output_strex (emit_f emit,
23093 enum machine_mode mode,
23099 const char *suffix = arm_ldrex_suffix (mode);
23102 operands[0] = result;
23103 operands[1] = value;
23104 operands[2] = memory;
23105 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23109 /* Helper to emit a two operand instruction. */
23111 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23117 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23120 /* Helper to emit a three operand instruction. */
23122 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23129 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23132 /* Emit a load store exclusive synchronization loop.
23136 if old_value != required_value
23138 t1 = sync_op (old_value, new_value)
23139 [mem] = t1, t2 = [0|1]
23143 t1 == t2 is not permitted
23144 t1 == old_value is permitted
23148 RTX register or const_int representing the required old_value for
23149 the modify to continue, if NULL no comparsion is performed. */
23151 arm_output_sync_loop (emit_f emit,
23152 enum machine_mode mode,
23155 rtx required_value,
23159 enum attr_sync_op sync_op,
23160 int early_barrier_required)
23164 gcc_assert (t1 != t2);
23166 if (early_barrier_required)
23167 arm_process_output_memory_barrier (emit, NULL);
23169 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23171 arm_output_ldrex (emit, mode, old_value, memory);
23173 if (required_value)
23177 operands[0] = old_value;
23178 operands[1] = required_value;
23179 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23180 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23186 arm_output_op3 (emit, "add", t1, old_value, new_value);
23190 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23194 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23198 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23202 arm_output_op3 (emit,"and", t1, old_value, new_value);
23206 arm_output_op3 (emit, "and", t1, old_value, new_value);
23207 arm_output_op2 (emit, "mvn", t1, t1);
23215 arm_output_strex (emit, mode, "", t2, t1, memory);
23217 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23218 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
23220 arm_process_output_memory_barrier (emit, NULL);
23221 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23225 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23228 default_value = operands[index - 1];
23230 return default_value;
23233 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23234 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23236 /* Extract the operands for a synchroniztion instruction from the
23237 instructions attributes and emit the instruction. */
23239 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23241 rtx result, memory, required_value, new_value, t1, t2;
23243 enum machine_mode mode;
23244 enum attr_sync_op sync_op;
23246 result = FETCH_SYNC_OPERAND(result, 0);
23247 memory = FETCH_SYNC_OPERAND(memory, 0);
23248 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23249 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23250 t1 = FETCH_SYNC_OPERAND(t1, 0);
23251 t2 = FETCH_SYNC_OPERAND(t2, 0);
23253 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23254 sync_op = get_attr_sync_op (insn);
23255 mode = GET_MODE (memory);
23257 arm_output_sync_loop (emit, mode, result, memory, required_value,
23258 new_value, t1, t2, sync_op, early_barrier);
23261 /* Emit a synchronization instruction loop. */
23263 arm_output_sync_insn (rtx insn, rtx *operands)
23265 arm_process_output_sync_insn (arm_emit, insn, operands);
23269 /* Count the number of machine instruction that will be emitted for a
23270 synchronization instruction. Note that the emitter used does not
23271 emit instructions, it just counts instructions being carefull not
23272 to count labels. */
23274 arm_sync_loop_insns (rtx insn, rtx *operands)
23276 arm_insn_count = 0;
23277 arm_process_output_sync_insn (arm_count, insn, operands);
23278 return arm_insn_count;
23281 /* Helper to call a target sync instruction generator, dealing with
23282 the variation in operands required by the different generators. */
23284 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23285 rtx memory, rtx required_value, rtx new_value)
23287 switch (generator->op)
23289 case arm_sync_generator_omn:
23290 gcc_assert (! required_value);
23291 return generator->u.omn (old_value, memory, new_value);
23293 case arm_sync_generator_omrn:
23294 gcc_assert (required_value);
23295 return generator->u.omrn (old_value, memory, required_value, new_value);
23301 /* Expand a synchronization loop. The synchronization loop is expanded
23302 as an opaque block of instructions in order to ensure that we do
23303 not subsequently get extraneous memory accesses inserted within the
23304 critical region. The exclusive access property of ldrex/strex is
23305 only guaranteed in there are no intervening memory accesses. */
23307 arm_expand_sync (enum machine_mode mode,
23308 struct arm_sync_generator *generator,
23309 rtx target, rtx memory, rtx required_value, rtx new_value)
23311 if (target == NULL)
23312 target = gen_reg_rtx (mode);
23314 memory = arm_legitimize_sync_memory (memory);
23315 if (mode != SImode)
23317 rtx load_temp = gen_reg_rtx (SImode);
23319 if (required_value)
23320 required_value = convert_modes (SImode, mode, required_value, true);
23322 new_value = convert_modes (SImode, mode, new_value, true);
23323 emit_insn (arm_call_generator (generator, load_temp, memory,
23324 required_value, new_value));
23325 emit_move_insn (target, gen_lowpart (mode, load_temp));
23329 emit_insn (arm_call_generator (generator, target, memory, required_value,
23335 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23337 /* Vectors which aren't in packed structures will not be less aligned than
23338 the natural alignment of their element type, so this is safe. */
23339 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23342 return default_builtin_vector_alignment_reachable (type, is_packed);
23346 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23347 const_tree type, int misalignment,
23350 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23352 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23357 /* If the misalignment is unknown, we should be able to handle the access
23358 so long as it is not to a member of a packed data structure. */
23359 if (misalignment == -1)
23362 /* Return true if the misalignment is a multiple of the natural alignment
23363 of the vector's element type. This is probably always going to be
23364 true in practice, since we've already established that this isn't a
23366 return ((misalignment % align) == 0);
23369 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23374 arm_conditional_register_usage (void)
23378 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23380 for (regno = FIRST_FPA_REGNUM;
23381 regno <= LAST_FPA_REGNUM; ++regno)
23382 fixed_regs[regno] = call_used_regs[regno] = 1;
23385 if (TARGET_THUMB1 && optimize_size)
23387 /* When optimizing for size on Thumb-1, it's better not
23388 to use the HI regs, because of the overhead of
23390 for (regno = FIRST_HI_REGNUM;
23391 regno <= LAST_HI_REGNUM; ++regno)
23392 fixed_regs[regno] = call_used_regs[regno] = 1;
23395 /* The link register can be clobbered by any branch insn,
23396 but we have no way to track that at present, so mark
23397 it as unavailable. */
23399 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23401 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23403 if (TARGET_MAVERICK)
23405 for (regno = FIRST_FPA_REGNUM;
23406 regno <= LAST_FPA_REGNUM; ++ regno)
23407 fixed_regs[regno] = call_used_regs[regno] = 1;
23408 for (regno = FIRST_CIRRUS_FP_REGNUM;
23409 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23411 fixed_regs[regno] = 0;
23412 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23417 /* VFPv3 registers are disabled when earlier VFP
23418 versions are selected due to the definition of
23419 LAST_VFP_REGNUM. */
23420 for (regno = FIRST_VFP_REGNUM;
23421 regno <= LAST_VFP_REGNUM; ++ regno)
23423 fixed_regs[regno] = 0;
23424 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23425 || regno >= FIRST_VFP_REGNUM + 32;
23430 if (TARGET_REALLY_IWMMXT)
23432 regno = FIRST_IWMMXT_GR_REGNUM;
23433 /* The 2002/10/09 revision of the XScale ABI has wCG0
23434 and wCG1 as call-preserved registers. The 2002/11/21
23435 revision changed this so that all wCG registers are
23436 scratch registers. */
23437 for (regno = FIRST_IWMMXT_GR_REGNUM;
23438 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23439 fixed_regs[regno] = 0;
23440 /* The XScale ABI has wR0 - wR9 as scratch registers,
23441 the rest as call-preserved registers. */
23442 for (regno = FIRST_IWMMXT_REGNUM;
23443 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23445 fixed_regs[regno] = 0;
23446 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23450 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23452 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23453 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23455 else if (TARGET_APCS_STACK)
23457 fixed_regs[10] = 1;
23458 call_used_regs[10] = 1;
23460 /* -mcaller-super-interworking reserves r11 for calls to
23461 _interwork_r11_call_via_rN(). Making the register global
23462 is an easy way of ensuring that it remains valid for all
23464 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23465 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23467 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23468 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23469 if (TARGET_CALLER_INTERWORKING)
23470 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23472 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23476 arm_preferred_rename_class (reg_class_t rclass)
23478 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23479 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23480 and code size can be reduced. */
23481 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23487 #include "gt-arm.h"