1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
181 static void arm_encode_section_info (tree, rtx, int);
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
245 static bool arm_class_likely_spilled_p (reg_class_t);
246 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
251 static void arm_conditional_register_usage (void);
252 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
259 /* Function calls made to this symbol must be done indirectly, because
260 it may lie outside of the 26 bit addressing range of a normal function
262 { "long_call", 0, 0, false, true, true, NULL },
263 /* Whereas these functions are always known to reside within the 26 bit
265 { "short_call", 0, 0, false, true, true, NULL },
266 /* Specify the procedure call conventions for a function. */
267 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
268 /* Interrupt Service Routines have special prologue and epilogue requirements. */
269 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
270 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
271 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
273 /* ARM/PE has three new attributes:
275 dllexport - for exporting a function/variable that will live in a dll
276 dllimport - for importing a function/variable from a dll
278 Microsoft allows multiple declspecs in one __declspec, separating
279 them with spaces. We do NOT support this. Instead, use __declspec
282 { "dllimport", 0, 0, true, false, false, NULL },
283 { "dllexport", 0, 0, true, false, false, NULL },
284 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
285 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
286 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
287 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
288 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
290 { NULL, 0, 0, false, false, false, NULL }
293 /* Set default optimization options. */
294 static const struct default_options arm_option_optimization_table[] =
296 /* Enable section anchors by default at -O1 or higher. */
297 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
298 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
299 { OPT_LEVELS_NONE, 0, NULL, 0 }
302 /* Initialize the GCC target structure. */
303 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
304 #undef TARGET_MERGE_DECL_ATTRIBUTES
305 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
308 #undef TARGET_LEGITIMIZE_ADDRESS
309 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
311 #undef TARGET_ATTRIBUTE_TABLE
312 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
314 #undef TARGET_ASM_FILE_START
315 #define TARGET_ASM_FILE_START arm_file_start
316 #undef TARGET_ASM_FILE_END
317 #define TARGET_ASM_FILE_END arm_file_end
319 #undef TARGET_ASM_ALIGNED_SI_OP
320 #define TARGET_ASM_ALIGNED_SI_OP NULL
321 #undef TARGET_ASM_INTEGER
322 #define TARGET_ASM_INTEGER arm_assemble_integer
324 #undef TARGET_PRINT_OPERAND
325 #define TARGET_PRINT_OPERAND arm_print_operand
326 #undef TARGET_PRINT_OPERAND_ADDRESS
327 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
328 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
329 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
334 #undef TARGET_ASM_FUNCTION_PROLOGUE
335 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
337 #undef TARGET_ASM_FUNCTION_EPILOGUE
338 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
340 #undef TARGET_DEFAULT_TARGET_FLAGS
341 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
342 #undef TARGET_HANDLE_OPTION
343 #define TARGET_HANDLE_OPTION arm_handle_option
345 #define TARGET_HELP arm_target_help
346 #undef TARGET_OPTION_OVERRIDE
347 #define TARGET_OPTION_OVERRIDE arm_option_override
348 #undef TARGET_OPTION_OPTIMIZATION_TABLE
349 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
351 #undef TARGET_COMP_TYPE_ATTRIBUTES
352 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
354 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
355 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
357 #undef TARGET_SCHED_ADJUST_COST
358 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
360 #undef TARGET_ENCODE_SECTION_INFO
362 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
364 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
367 #undef TARGET_STRIP_NAME_ENCODING
368 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
370 #undef TARGET_ASM_INTERNAL_LABEL
371 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
373 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
374 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
376 #undef TARGET_FUNCTION_VALUE
377 #define TARGET_FUNCTION_VALUE arm_function_value
379 #undef TARGET_LIBCALL_VALUE
380 #define TARGET_LIBCALL_VALUE arm_libcall_value
382 #undef TARGET_ASM_OUTPUT_MI_THUNK
383 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
387 #undef TARGET_RTX_COSTS
388 #define TARGET_RTX_COSTS arm_rtx_costs
389 #undef TARGET_ADDRESS_COST
390 #define TARGET_ADDRESS_COST arm_address_cost
392 #undef TARGET_SHIFT_TRUNCATION_MASK
393 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
394 #undef TARGET_VECTOR_MODE_SUPPORTED_P
395 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
396 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
397 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
399 #undef TARGET_MACHINE_DEPENDENT_REORG
400 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
402 #undef TARGET_INIT_BUILTINS
403 #define TARGET_INIT_BUILTINS arm_init_builtins
404 #undef TARGET_EXPAND_BUILTIN
405 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
407 #undef TARGET_INIT_LIBFUNCS
408 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
410 #undef TARGET_PROMOTE_FUNCTION_MODE
411 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
414 #undef TARGET_PASS_BY_REFERENCE
415 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
416 #undef TARGET_ARG_PARTIAL_BYTES
417 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
418 #undef TARGET_FUNCTION_ARG
419 #define TARGET_FUNCTION_ARG arm_function_arg
420 #undef TARGET_FUNCTION_ARG_ADVANCE
421 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
422 #undef TARGET_FUNCTION_ARG_BOUNDARY
423 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
428 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
429 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
431 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
432 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
433 #undef TARGET_TRAMPOLINE_INIT
434 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
435 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
436 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
438 #undef TARGET_DEFAULT_SHORT_ENUMS
439 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
441 #undef TARGET_ALIGN_ANON_BITFIELD
442 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
444 #undef TARGET_NARROW_VOLATILE_BITFIELD
445 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
447 #undef TARGET_CXX_GUARD_TYPE
448 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
450 #undef TARGET_CXX_GUARD_MASK_BIT
451 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
453 #undef TARGET_CXX_GET_COOKIE_SIZE
454 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
456 #undef TARGET_CXX_COOKIE_HAS_SIZE
457 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
459 #undef TARGET_CXX_CDTOR_RETURNS_THIS
460 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
462 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
463 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
465 #undef TARGET_CXX_USE_AEABI_ATEXIT
466 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
468 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
469 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
470 arm_cxx_determine_class_data_visibility
472 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
473 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
475 #undef TARGET_RETURN_IN_MSB
476 #define TARGET_RETURN_IN_MSB arm_return_in_msb
478 #undef TARGET_RETURN_IN_MEMORY
479 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
481 #undef TARGET_MUST_PASS_IN_STACK
482 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
485 #undef TARGET_ASM_UNWIND_EMIT
486 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
488 /* EABI unwinding tables use a different format for the typeinfo tables. */
489 #undef TARGET_ASM_TTYPE
490 #define TARGET_ASM_TTYPE arm_output_ttype
492 #undef TARGET_ARM_EABI_UNWINDER
493 #define TARGET_ARM_EABI_UNWINDER true
495 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
496 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
498 #undef TARGET_ASM_INIT_SECTIONS
499 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
500 #endif /* ARM_UNWIND_INFO */
502 #undef TARGET_EXCEPT_UNWIND_INFO
503 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
511 #undef TARGET_CANNOT_COPY_INSN_P
512 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
515 #undef TARGET_HAVE_TLS
516 #define TARGET_HAVE_TLS true
519 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
520 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
522 #undef TARGET_CANNOT_FORCE_CONST_MEM
523 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
525 #undef TARGET_MAX_ANCHOR_OFFSET
526 #define TARGET_MAX_ANCHOR_OFFSET 4095
528 /* The minimum is set such that the total size of the block
529 for a particular anchor is -4088 + 1 + 4095 bytes, which is
530 divisible by eight, ensuring natural spacing of anchors. */
531 #undef TARGET_MIN_ANCHOR_OFFSET
532 #define TARGET_MIN_ANCHOR_OFFSET -4088
534 #undef TARGET_SCHED_ISSUE_RATE
535 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
537 #undef TARGET_MANGLE_TYPE
538 #define TARGET_MANGLE_TYPE arm_mangle_type
540 #undef TARGET_BUILD_BUILTIN_VA_LIST
541 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
542 #undef TARGET_EXPAND_BUILTIN_VA_START
543 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
544 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
545 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
548 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
549 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
552 #undef TARGET_LEGITIMATE_ADDRESS_P
553 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
555 #undef TARGET_INVALID_PARAMETER_TYPE
556 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
558 #undef TARGET_INVALID_RETURN_TYPE
559 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
561 #undef TARGET_PROMOTED_TYPE
562 #define TARGET_PROMOTED_TYPE arm_promoted_type
564 #undef TARGET_CONVERT_TO_TYPE
565 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
567 #undef TARGET_SCALAR_MODE_SUPPORTED_P
568 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
570 #undef TARGET_FRAME_POINTER_REQUIRED
571 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
573 #undef TARGET_CAN_ELIMINATE
574 #define TARGET_CAN_ELIMINATE arm_can_eliminate
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE
577 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
579 #undef TARGET_CLASS_LIKELY_SPILLED_P
580 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
582 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
583 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
584 arm_vector_alignment_reachable
586 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
587 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
588 arm_builtin_support_vector_misalignment
590 #undef TARGET_PREFERRED_RENAME_CLASS
591 #define TARGET_PREFERRED_RENAME_CLASS \
592 arm_preferred_rename_class
594 struct gcc_target targetm = TARGET_INITIALIZER;
596 /* Obstack for minipool constant handling. */
597 static struct obstack minipool_obstack;
598 static char * minipool_startobj;
600 /* The maximum number of insns skipped which
601 will be conditionalised if possible. */
602 static int max_insns_skipped = 5;
604 extern FILE * asm_out_file;
606 /* True if we are currently building a constant table. */
607 int making_const_table;
609 /* The processor for which instructions should be scheduled. */
610 enum processor_type arm_tune = arm_none;
612 /* The current tuning set. */
613 const struct tune_params *current_tune;
615 /* Which floating point hardware to schedule for. */
618 /* Which floating popint hardware to use. */
619 const struct arm_fpu_desc *arm_fpu_desc;
621 /* Whether to use floating point hardware. */
622 enum float_abi_type arm_float_abi;
624 /* Which __fp16 format to use. */
625 enum arm_fp16_format_type arm_fp16_format;
627 /* Which ABI to use. */
628 enum arm_abi_type arm_abi;
630 /* Which thread pointer model to use. */
631 enum arm_tp_type target_thread_pointer = TP_AUTO;
633 /* Used to parse -mstructure_size_boundary command line option. */
634 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
636 /* Used for Thumb call_via trampolines. */
637 rtx thumb_call_via_label[14];
638 static int thumb_call_reg_needed;
640 /* Bit values used to identify processor capabilities. */
641 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
642 #define FL_ARCH3M (1 << 1) /* Extended multiply */
643 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
644 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
645 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
646 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
647 #define FL_THUMB (1 << 6) /* Thumb aware */
648 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
649 #define FL_STRONG (1 << 8) /* StrongARM */
650 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
651 #define FL_XSCALE (1 << 10) /* XScale */
652 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
653 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
654 media instructions. */
655 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
656 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
657 Note: ARM6 & 7 derivatives only. */
658 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
659 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
660 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
662 #define FL_DIV (1 << 18) /* Hardware divide. */
663 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
664 #define FL_NEON (1 << 20) /* Neon instructions. */
665 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
667 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
669 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
671 /* Flags that only effect tuning, not available instructions. */
672 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
675 #define FL_FOR_ARCH2 FL_NOTM
676 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
677 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
678 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
679 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
680 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
681 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
682 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
683 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
684 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
685 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
686 #define FL_FOR_ARCH6J FL_FOR_ARCH6
687 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
688 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
689 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
690 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
691 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
692 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
693 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
694 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
695 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
696 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
698 /* The bits in this mask specify which
699 instructions we are allowed to generate. */
700 static unsigned long insn_flags = 0;
702 /* The bits in this mask specify which instruction scheduling options should
704 static unsigned long tune_flags = 0;
706 /* The following are used in the arm.md file as equivalents to bits
707 in the above two flag variables. */
709 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
721 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
724 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
727 /* Nonzero if this chip supports the ARM 6K extensions. */
730 /* Nonzero if this chip supports the ARM 7 extensions. */
733 /* Nonzero if instructions not present in the 'M' profile can be used. */
734 int arm_arch_notm = 0;
736 /* Nonzero if instructions present in ARMv7E-M can be used. */
739 /* Nonzero if this chip can benefit from load scheduling. */
740 int arm_ld_sched = 0;
742 /* Nonzero if this chip is a StrongARM. */
743 int arm_tune_strongarm = 0;
745 /* Nonzero if this chip is a Cirrus variant. */
746 int arm_arch_cirrus = 0;
748 /* Nonzero if this chip supports Intel Wireless MMX technology. */
749 int arm_arch_iwmmxt = 0;
751 /* Nonzero if this chip is an XScale. */
752 int arm_arch_xscale = 0;
754 /* Nonzero if tuning for XScale */
755 int arm_tune_xscale = 0;
757 /* Nonzero if we want to tune for stores that access the write-buffer.
758 This typically means an ARM6 or ARM7 with MMU or MPU. */
759 int arm_tune_wbuf = 0;
761 /* Nonzero if tuning for Cortex-A9. */
762 int arm_tune_cortex_a9 = 0;
764 /* Nonzero if generating Thumb instructions. */
767 /* Nonzero if generating Thumb-1 instructions. */
770 /* Nonzero if we should define __THUMB_INTERWORK__ in the
772 XXX This is a bit of a hack, it's intended to help work around
773 problems in GLD which doesn't understand that armv5t code is
774 interworking clean. */
775 int arm_cpp_interwork = 0;
777 /* Nonzero if chip supports Thumb 2. */
780 /* Nonzero if chip supports integer division instruction. */
783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
784 we must report the mode of the memory reference from
785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
786 enum machine_mode output_memory_reference_mode;
788 /* The register number to be used for the PIC offset register. */
789 unsigned arm_pic_register = INVALID_REGNUM;
791 /* Set to 1 after arm_reorg has started. Reset to start at the start of
792 the next function. */
793 static int after_arm_reorg = 0;
795 enum arm_pcs arm_pcs_default;
797 /* For an explanation of these variables, see final_prescan_insn below. */
799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
800 enum arm_cond_code arm_current_cc;
803 int arm_target_label;
804 /* The number of conditionally executed insns, including the current insn. */
805 int arm_condexec_count = 0;
806 /* A bitmask specifying the patterns for the IT block.
807 Zero means do not output an IT block before this insn. */
808 int arm_condexec_mask = 0;
809 /* The number of bits used in arm_condexec_mask. */
810 int arm_condexec_masklen = 0;
812 /* The condition codes of the ARM, and the inverse function. */
813 static const char * const arm_condition_codes[] =
815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
820 int arm_regs_in_sequence[] =
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
826 #define streq(string1, string2) (strcmp (string1, string2) == 0)
828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
830 | (1 << PIC_OFFSET_TABLE_REGNUM)))
832 /* Initialization code. */
836 const char *const name;
837 enum processor_type core;
839 const unsigned long flags;
840 const struct tune_params *const tune;
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
850 const struct tune_params arm_slowmul_tune =
852 arm_slowmul_rtx_costs,
855 ARM_PREFETCH_NOT_BENEFICIAL
858 const struct tune_params arm_fastmul_tune =
860 arm_fastmul_rtx_costs,
863 ARM_PREFETCH_NOT_BENEFICIAL
866 const struct tune_params arm_xscale_tune =
868 arm_xscale_rtx_costs,
869 xscale_sched_adjust_cost,
871 ARM_PREFETCH_NOT_BENEFICIAL
874 const struct tune_params arm_9e_tune =
879 ARM_PREFETCH_NOT_BENEFICIAL
882 const struct tune_params arm_cortex_a9_tune =
885 cortex_a9_sched_adjust_cost,
887 ARM_PREFETCH_BENEFICIAL(4,32,32)
890 const struct tune_params arm_fa726te_tune =
893 fa726te_sched_adjust_cost,
895 ARM_PREFETCH_NOT_BENEFICIAL
899 /* Not all of these give usefully different compilation alternatives,
900 but there is no simple way of generalizing them. */
901 static const struct processors all_cores[] =
904 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
905 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
906 #include "arm-cores.def"
908 {NULL, arm_none, NULL, 0, NULL}
911 static const struct processors all_architectures[] =
913 /* ARM Architectures */
914 /* We don't specify tuning costs here as it will be figured out
917 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
918 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
919 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
920 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
921 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
922 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
923 implementations that support it, so we will leave it out for now. */
924 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
925 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
926 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
927 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
928 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
929 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
930 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
931 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
932 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
933 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
934 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
935 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
936 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
937 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
938 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
939 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
940 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
941 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
942 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
943 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
944 {NULL, arm_none, NULL, 0 , NULL}
948 /* These are populated as commandline arguments are processed, or NULL
950 static const struct processors *arm_selected_arch;
951 static const struct processors *arm_selected_cpu;
952 static const struct processors *arm_selected_tune;
954 /* The name of the preprocessor macro to define for this architecture. */
956 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
958 /* Available values for -mfpu=. */
960 static const struct arm_fpu_desc all_fpus[] =
962 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
963 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
964 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
965 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
966 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
967 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
968 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
969 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
970 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
971 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
972 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
973 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
974 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
975 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
976 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
977 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
978 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
979 /* Compatibility aliases. */
980 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
987 enum float_abi_type abi_type;
991 /* Available values for -mfloat-abi=. */
993 static const struct float_abi all_float_abis[] =
995 {"soft", ARM_FLOAT_ABI_SOFT},
996 {"softfp", ARM_FLOAT_ABI_SOFTFP},
997 {"hard", ARM_FLOAT_ABI_HARD}
1004 enum arm_fp16_format_type fp16_format_type;
1008 /* Available values for -mfp16-format=. */
1010 static const struct fp16_format all_fp16_formats[] =
1012 {"none", ARM_FP16_FORMAT_NONE},
1013 {"ieee", ARM_FP16_FORMAT_IEEE},
1014 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1021 enum arm_abi_type abi_type;
1025 /* Available values for -mabi=. */
1027 static const struct abi_name arm_all_abis[] =
1029 {"apcs-gnu", ARM_ABI_APCS},
1030 {"atpcs", ARM_ABI_ATPCS},
1031 {"aapcs", ARM_ABI_AAPCS},
1032 {"iwmmxt", ARM_ABI_IWMMXT},
1033 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1036 /* Supported TLS relocations. */
1046 /* The maximum number of insns to be used when loading a constant. */
1048 arm_constant_limit (bool size_p)
1050 return size_p ? 1 : current_tune->constant_limit;
1053 /* Emit an insn that's a simple single-set. Both the operands must be known
1056 emit_set_insn (rtx x, rtx y)
1058 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1061 /* Return the number of bits set in VALUE. */
1063 bit_count (unsigned long value)
1065 unsigned long count = 0;
1070 value &= value - 1; /* Clear the least-significant set bit. */
1076 /* Set up library functions unique to ARM. */
1079 arm_init_libfuncs (void)
1081 /* There are no special library functions unless we are using the
1086 /* The functions below are described in Section 4 of the "Run-Time
1087 ABI for the ARM architecture", Version 1.0. */
1089 /* Double-precision floating-point arithmetic. Table 2. */
1090 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1091 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1092 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1093 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1094 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1096 /* Double-precision comparisons. Table 3. */
1097 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1098 set_optab_libfunc (ne_optab, DFmode, NULL);
1099 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1100 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1101 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1102 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1103 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1105 /* Single-precision floating-point arithmetic. Table 4. */
1106 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1107 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1108 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1109 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1110 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1112 /* Single-precision comparisons. Table 5. */
1113 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1114 set_optab_libfunc (ne_optab, SFmode, NULL);
1115 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1116 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1117 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1118 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1119 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1121 /* Floating-point to integer conversions. Table 6. */
1122 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1123 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1124 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1125 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1126 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1127 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1128 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1129 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1131 /* Conversions between floating types. Table 7. */
1132 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1133 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1135 /* Integer to floating-point conversions. Table 8. */
1136 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1137 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1138 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1139 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1140 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1141 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1142 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1143 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1145 /* Long long. Table 9. */
1146 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1147 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1148 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1149 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1150 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1151 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1152 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1153 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1155 /* Integer (32/32->32) division. \S 4.3.1. */
1156 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1157 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1159 /* The divmod functions are designed so that they can be used for
1160 plain division, even though they return both the quotient and the
1161 remainder. The quotient is returned in the usual location (i.e.,
1162 r0 for SImode, {r0, r1} for DImode), just as would be expected
1163 for an ordinary division routine. Because the AAPCS calling
1164 conventions specify that all of { r0, r1, r2, r3 } are
1165 callee-saved registers, there is no need to tell the compiler
1166 explicitly that those registers are clobbered by these
1168 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1169 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1171 /* For SImode division the ABI provides div-without-mod routines,
1172 which are faster. */
1173 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1174 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1176 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1177 divmod libcalls instead. */
1178 set_optab_libfunc (smod_optab, DImode, NULL);
1179 set_optab_libfunc (umod_optab, DImode, NULL);
1180 set_optab_libfunc (smod_optab, SImode, NULL);
1181 set_optab_libfunc (umod_optab, SImode, NULL);
1183 /* Half-precision float operations. The compiler handles all operations
1184 with NULL libfuncs by converting the SFmode. */
1185 switch (arm_fp16_format)
1187 case ARM_FP16_FORMAT_IEEE:
1188 case ARM_FP16_FORMAT_ALTERNATIVE:
1191 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1192 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1194 : "__gnu_f2h_alternative"));
1195 set_conv_libfunc (sext_optab, SFmode, HFmode,
1196 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1198 : "__gnu_h2f_alternative"));
1201 set_optab_libfunc (add_optab, HFmode, NULL);
1202 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1203 set_optab_libfunc (smul_optab, HFmode, NULL);
1204 set_optab_libfunc (neg_optab, HFmode, NULL);
1205 set_optab_libfunc (sub_optab, HFmode, NULL);
1208 set_optab_libfunc (eq_optab, HFmode, NULL);
1209 set_optab_libfunc (ne_optab, HFmode, NULL);
1210 set_optab_libfunc (lt_optab, HFmode, NULL);
1211 set_optab_libfunc (le_optab, HFmode, NULL);
1212 set_optab_libfunc (ge_optab, HFmode, NULL);
1213 set_optab_libfunc (gt_optab, HFmode, NULL);
1214 set_optab_libfunc (unord_optab, HFmode, NULL);
1221 if (TARGET_AAPCS_BASED)
1222 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1225 /* On AAPCS systems, this is the "struct __va_list". */
1226 static GTY(()) tree va_list_type;
1228 /* Return the type to use as __builtin_va_list. */
1230 arm_build_builtin_va_list (void)
1235 if (!TARGET_AAPCS_BASED)
1236 return std_build_builtin_va_list ();
1238 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1246 The C Library ABI further reinforces this definition in \S
1249 We must follow this definition exactly. The structure tag
1250 name is visible in C++ mangled names, and thus forms a part
1251 of the ABI. The field name may be used by people who
1252 #include <stdarg.h>. */
1253 /* Create the type. */
1254 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1255 /* Give it the required name. */
1256 va_list_name = build_decl (BUILTINS_LOCATION,
1258 get_identifier ("__va_list"),
1260 DECL_ARTIFICIAL (va_list_name) = 1;
1261 TYPE_NAME (va_list_type) = va_list_name;
1262 TYPE_STUB_DECL (va_list_type) = va_list_name;
1263 /* Create the __ap field. */
1264 ap_field = build_decl (BUILTINS_LOCATION,
1266 get_identifier ("__ap"),
1268 DECL_ARTIFICIAL (ap_field) = 1;
1269 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1270 TYPE_FIELDS (va_list_type) = ap_field;
1271 /* Compute its layout. */
1272 layout_type (va_list_type);
1274 return va_list_type;
1277 /* Return an expression of type "void *" pointing to the next
1278 available argument in a variable-argument list. VALIST is the
1279 user-level va_list object, of type __builtin_va_list. */
1281 arm_extract_valist_ptr (tree valist)
1283 if (TREE_TYPE (valist) == error_mark_node)
1284 return error_mark_node;
1286 /* On an AAPCS target, the pointer is stored within "struct
1288 if (TARGET_AAPCS_BASED)
1290 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1291 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1292 valist, ap_field, NULL_TREE);
1298 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1300 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1302 valist = arm_extract_valist_ptr (valist);
1303 std_expand_builtin_va_start (valist, nextarg);
1306 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1308 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1311 valist = arm_extract_valist_ptr (valist);
1312 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1315 /* Lookup NAME in SEL. */
1317 static const struct processors *
1318 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1320 if (!(name && *name))
1323 for (; sel->name != NULL; sel++)
1325 if (streq (name, sel->name))
1329 error ("bad value (%s) for %s switch", name, desc);
1333 /* Implement TARGET_HANDLE_OPTION. */
1336 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1341 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1345 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1348 case OPT_mhard_float:
1349 target_float_abi_name = "hard";
1352 case OPT_msoft_float:
1353 target_float_abi_name = "soft";
1357 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1366 arm_target_help (void)
1369 static int columns = 0;
1372 /* If we have not done so already, obtain the desired maximum width of
1373 the output. Note - this is a duplication of the code at the start of
1374 gcc/opts.c:print_specific_help() - the two copies should probably be
1375 replaced by a single function. */
1380 p = getenv ("COLUMNS");
1383 int value = atoi (p);
1390 /* Use a reasonable default. */
1394 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1396 /* The - 2 is because we know that the last entry in the array is NULL. */
1397 i = ARRAY_SIZE (all_cores) - 2;
1399 printf (" %s", all_cores[i].name);
1400 remaining = columns - (strlen (all_cores[i].name) + 4);
1401 gcc_assert (remaining >= 0);
1405 int len = strlen (all_cores[i].name);
1407 if (remaining > len + 2)
1409 printf (", %s", all_cores[i].name);
1410 remaining -= len + 2;
1416 printf ("\n %s", all_cores[i].name);
1417 remaining = columns - (len + 4);
1421 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1423 i = ARRAY_SIZE (all_architectures) - 2;
1426 printf (" %s", all_architectures[i].name);
1427 remaining = columns - (strlen (all_architectures[i].name) + 4);
1428 gcc_assert (remaining >= 0);
1432 int len = strlen (all_architectures[i].name);
1434 if (remaining > len + 2)
1436 printf (", %s", all_architectures[i].name);
1437 remaining -= len + 2;
1443 printf ("\n %s", all_architectures[i].name);
1444 remaining = columns - (len + 4);
1451 /* Fix up any incompatible options that the user has specified. */
1453 arm_option_override (void)
1457 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1458 SUBTARGET_OVERRIDE_OPTIONS;
1461 if (arm_selected_arch)
1463 if (arm_selected_cpu)
1465 /* Check for conflict between mcpu and march. */
1466 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1468 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1469 arm_selected_cpu->name, arm_selected_arch->name);
1470 /* -march wins for code generation.
1471 -mcpu wins for default tuning. */
1472 if (!arm_selected_tune)
1473 arm_selected_tune = arm_selected_cpu;
1475 arm_selected_cpu = arm_selected_arch;
1479 arm_selected_arch = NULL;
1482 /* Pick a CPU based on the architecture. */
1483 arm_selected_cpu = arm_selected_arch;
1486 /* If the user did not specify a processor, choose one for them. */
1487 if (!arm_selected_cpu)
1489 const struct processors * sel;
1490 unsigned int sought;
1492 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1493 if (!arm_selected_cpu->name)
1495 #ifdef SUBTARGET_CPU_DEFAULT
1496 /* Use the subtarget default CPU if none was specified by
1498 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1500 /* Default to ARM6. */
1501 if (!arm_selected_cpu->name)
1502 arm_selected_cpu = &all_cores[arm6];
1505 sel = arm_selected_cpu;
1506 insn_flags = sel->flags;
1508 /* Now check to see if the user has specified some command line
1509 switch that require certain abilities from the cpu. */
1512 if (TARGET_INTERWORK || TARGET_THUMB)
1514 sought |= (FL_THUMB | FL_MODE32);
1516 /* There are no ARM processors that support both APCS-26 and
1517 interworking. Therefore we force FL_MODE26 to be removed
1518 from insn_flags here (if it was set), so that the search
1519 below will always be able to find a compatible processor. */
1520 insn_flags &= ~FL_MODE26;
1523 if (sought != 0 && ((sought & insn_flags) != sought))
1525 /* Try to locate a CPU type that supports all of the abilities
1526 of the default CPU, plus the extra abilities requested by
1528 for (sel = all_cores; sel->name != NULL; sel++)
1529 if ((sel->flags & sought) == (sought | insn_flags))
1532 if (sel->name == NULL)
1534 unsigned current_bit_count = 0;
1535 const struct processors * best_fit = NULL;
1537 /* Ideally we would like to issue an error message here
1538 saying that it was not possible to find a CPU compatible
1539 with the default CPU, but which also supports the command
1540 line options specified by the programmer, and so they
1541 ought to use the -mcpu=<name> command line option to
1542 override the default CPU type.
1544 If we cannot find a cpu that has both the
1545 characteristics of the default cpu and the given
1546 command line options we scan the array again looking
1547 for a best match. */
1548 for (sel = all_cores; sel->name != NULL; sel++)
1549 if ((sel->flags & sought) == sought)
1553 count = bit_count (sel->flags & insn_flags);
1555 if (count >= current_bit_count)
1558 current_bit_count = count;
1562 gcc_assert (best_fit);
1566 arm_selected_cpu = sel;
1570 gcc_assert (arm_selected_cpu);
1571 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1572 if (!arm_selected_tune)
1573 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1575 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1576 insn_flags = arm_selected_cpu->flags;
1578 arm_tune = arm_selected_tune->core;
1579 tune_flags = arm_selected_tune->flags;
1580 current_tune = arm_selected_tune->tune;
1582 if (target_fp16_format_name)
1584 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1586 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1588 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1592 if (i == ARRAY_SIZE (all_fp16_formats))
1593 error ("invalid __fp16 format option: -mfp16-format=%s",
1594 target_fp16_format_name);
1597 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1599 if (target_abi_name)
1601 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1603 if (streq (arm_all_abis[i].name, target_abi_name))
1605 arm_abi = arm_all_abis[i].abi_type;
1609 if (i == ARRAY_SIZE (arm_all_abis))
1610 error ("invalid ABI option: -mabi=%s", target_abi_name);
1613 arm_abi = ARM_DEFAULT_ABI;
1615 /* Make sure that the processor choice does not conflict with any of the
1616 other command line choices. */
1617 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1618 error ("target CPU does not support ARM mode");
1620 /* BPABI targets use linker tricks to allow interworking on cores
1621 without thumb support. */
1622 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1624 warning (0, "target CPU does not support interworking" );
1625 target_flags &= ~MASK_INTERWORK;
1628 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1630 warning (0, "target CPU does not support THUMB instructions");
1631 target_flags &= ~MASK_THUMB;
1634 if (TARGET_APCS_FRAME && TARGET_THUMB)
1636 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1637 target_flags &= ~MASK_APCS_FRAME;
1640 /* Callee super interworking implies thumb interworking. Adding
1641 this to the flags here simplifies the logic elsewhere. */
1642 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1643 target_flags |= MASK_INTERWORK;
1645 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1646 from here where no function is being compiled currently. */
1647 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1648 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1650 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1651 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1653 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1655 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1656 target_flags |= MASK_APCS_FRAME;
1659 if (TARGET_POKE_FUNCTION_NAME)
1660 target_flags |= MASK_APCS_FRAME;
1662 if (TARGET_APCS_REENT && flag_pic)
1663 error ("-fpic and -mapcs-reent are incompatible");
1665 if (TARGET_APCS_REENT)
1666 warning (0, "APCS reentrant code not supported. Ignored");
1668 /* If this target is normally configured to use APCS frames, warn if they
1669 are turned off and debugging is turned on. */
1671 && write_symbols != NO_DEBUG
1672 && !TARGET_APCS_FRAME
1673 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1674 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1676 if (TARGET_APCS_FLOAT)
1677 warning (0, "passing floating point arguments in fp regs not yet supported");
1679 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1680 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1681 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1682 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1683 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1684 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1685 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1686 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1687 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1688 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1689 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1690 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1691 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1692 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1694 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1695 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1696 thumb_code = TARGET_ARM == 0;
1697 thumb1_code = TARGET_THUMB1 != 0;
1698 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1699 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1700 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1701 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1702 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1704 /* If we are not using the default (ARM mode) section anchor offset
1705 ranges, then set the correct ranges now. */
1708 /* Thumb-1 LDR instructions cannot have negative offsets.
1709 Permissible positive offset ranges are 5-bit (for byte loads),
1710 6-bit (for halfword loads), or 7-bit (for word loads).
1711 Empirical results suggest a 7-bit anchor range gives the best
1712 overall code size. */
1713 targetm.min_anchor_offset = 0;
1714 targetm.max_anchor_offset = 127;
1716 else if (TARGET_THUMB2)
1718 /* The minimum is set such that the total size of the block
1719 for a particular anchor is 248 + 1 + 4095 bytes, which is
1720 divisible by eight, ensuring natural spacing of anchors. */
1721 targetm.min_anchor_offset = -248;
1722 targetm.max_anchor_offset = 4095;
1725 /* V5 code we generate is completely interworking capable, so we turn off
1726 TARGET_INTERWORK here to avoid many tests later on. */
1728 /* XXX However, we must pass the right pre-processor defines to CPP
1729 or GLD can get confused. This is a hack. */
1730 if (TARGET_INTERWORK)
1731 arm_cpp_interwork = 1;
1734 target_flags &= ~MASK_INTERWORK;
1736 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1737 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1739 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1740 error ("iwmmxt abi requires an iwmmxt capable cpu");
1742 if (target_fpu_name == NULL && target_fpe_name != NULL)
1744 if (streq (target_fpe_name, "2"))
1745 target_fpu_name = "fpe2";
1746 else if (streq (target_fpe_name, "3"))
1747 target_fpu_name = "fpe3";
1749 error ("invalid floating point emulation option: -mfpe=%s",
1753 if (target_fpu_name == NULL)
1755 #ifdef FPUTYPE_DEFAULT
1756 target_fpu_name = FPUTYPE_DEFAULT;
1758 if (arm_arch_cirrus)
1759 target_fpu_name = "maverick";
1761 target_fpu_name = "fpe2";
1765 arm_fpu_desc = NULL;
1766 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1768 if (streq (all_fpus[i].name, target_fpu_name))
1770 arm_fpu_desc = &all_fpus[i];
1777 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1781 switch (arm_fpu_desc->model)
1783 case ARM_FP_MODEL_FPA:
1784 if (arm_fpu_desc->rev == 2)
1785 arm_fpu_attr = FPU_FPE2;
1786 else if (arm_fpu_desc->rev == 3)
1787 arm_fpu_attr = FPU_FPE3;
1789 arm_fpu_attr = FPU_FPA;
1792 case ARM_FP_MODEL_MAVERICK:
1793 arm_fpu_attr = FPU_MAVERICK;
1796 case ARM_FP_MODEL_VFP:
1797 arm_fpu_attr = FPU_VFP;
1804 if (target_float_abi_name != NULL)
1806 /* The user specified a FP ABI. */
1807 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1809 if (streq (all_float_abis[i].name, target_float_abi_name))
1811 arm_float_abi = all_float_abis[i].abi_type;
1815 if (i == ARRAY_SIZE (all_float_abis))
1816 error ("invalid floating point abi: -mfloat-abi=%s",
1817 target_float_abi_name);
1820 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1822 if (TARGET_AAPCS_BASED
1823 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1824 error ("FPA is unsupported in the AAPCS");
1826 if (TARGET_AAPCS_BASED)
1828 if (TARGET_CALLER_INTERWORKING)
1829 error ("AAPCS does not support -mcaller-super-interworking");
1831 if (TARGET_CALLEE_INTERWORKING)
1832 error ("AAPCS does not support -mcallee-super-interworking");
1835 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1836 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1837 will ever exist. GCC makes no attempt to support this combination. */
1838 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1839 sorry ("iWMMXt and hardware floating point");
1841 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1842 if (TARGET_THUMB2 && TARGET_IWMMXT)
1843 sorry ("Thumb-2 iWMMXt");
1845 /* __fp16 support currently assumes the core has ldrh. */
1846 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1847 sorry ("__fp16 and no ldrh");
1849 /* If soft-float is specified then don't use FPU. */
1850 if (TARGET_SOFT_FLOAT)
1851 arm_fpu_attr = FPU_NONE;
1853 if (TARGET_AAPCS_BASED)
1855 if (arm_abi == ARM_ABI_IWMMXT)
1856 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1857 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1858 && TARGET_HARD_FLOAT
1860 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1862 arm_pcs_default = ARM_PCS_AAPCS;
1866 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1867 sorry ("-mfloat-abi=hard and VFP");
1869 if (arm_abi == ARM_ABI_APCS)
1870 arm_pcs_default = ARM_PCS_APCS;
1872 arm_pcs_default = ARM_PCS_ATPCS;
1875 /* For arm2/3 there is no need to do any scheduling if there is only
1876 a floating point emulator, or we are doing software floating-point. */
1877 if ((TARGET_SOFT_FLOAT
1878 || (TARGET_FPA && arm_fpu_desc->rev))
1879 && (tune_flags & FL_MODE32) == 0)
1880 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1882 if (target_thread_switch)
1884 if (strcmp (target_thread_switch, "soft") == 0)
1885 target_thread_pointer = TP_SOFT;
1886 else if (strcmp (target_thread_switch, "auto") == 0)
1887 target_thread_pointer = TP_AUTO;
1888 else if (strcmp (target_thread_switch, "cp15") == 0)
1889 target_thread_pointer = TP_CP15;
1891 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1894 /* Use the cp15 method if it is available. */
1895 if (target_thread_pointer == TP_AUTO)
1897 if (arm_arch6k && !TARGET_THUMB1)
1898 target_thread_pointer = TP_CP15;
1900 target_thread_pointer = TP_SOFT;
1903 if (TARGET_HARD_TP && TARGET_THUMB1)
1904 error ("can not use -mtp=cp15 with 16-bit Thumb");
1906 /* Override the default structure alignment for AAPCS ABI. */
1907 if (TARGET_AAPCS_BASED)
1908 arm_structure_size_boundary = 8;
1910 if (structure_size_string != NULL)
1912 int size = strtol (structure_size_string, NULL, 0);
1914 if (size == 8 || size == 32
1915 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1916 arm_structure_size_boundary = size;
1918 warning (0, "structure size boundary can only be set to %s",
1919 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1922 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1924 error ("RTP PIC is incompatible with Thumb");
1928 /* If stack checking is disabled, we can use r10 as the PIC register,
1929 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1930 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1932 if (TARGET_VXWORKS_RTP)
1933 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1934 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1937 if (flag_pic && TARGET_VXWORKS_RTP)
1938 arm_pic_register = 9;
1940 if (arm_pic_register_string != NULL)
1942 int pic_register = decode_reg_name (arm_pic_register_string);
1945 warning (0, "-mpic-register= is useless without -fpic");
1947 /* Prevent the user from choosing an obviously stupid PIC register. */
1948 else if (pic_register < 0 || call_used_regs[pic_register]
1949 || pic_register == HARD_FRAME_POINTER_REGNUM
1950 || pic_register == STACK_POINTER_REGNUM
1951 || pic_register >= PC_REGNUM
1952 || (TARGET_VXWORKS_RTP
1953 && (unsigned int) pic_register != arm_pic_register))
1954 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1956 arm_pic_register = pic_register;
1959 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1960 if (fix_cm3_ldrd == 2)
1962 if (arm_selected_cpu->core == cortexm3)
1968 if (TARGET_THUMB1 && flag_schedule_insns)
1970 /* Don't warn since it's on by default in -O2. */
1971 flag_schedule_insns = 0;
1976 /* If optimizing for size, bump the number of instructions that we
1977 are prepared to conditionally execute (even on a StrongARM). */
1978 max_insns_skipped = 6;
1982 /* StrongARM has early execution of branches, so a sequence
1983 that is worth skipping is shorter. */
1984 if (arm_tune_strongarm)
1985 max_insns_skipped = 3;
1988 /* Hot/Cold partitioning is not currently supported, since we can't
1989 handle literal pool placement in that case. */
1990 if (flag_reorder_blocks_and_partition)
1992 inform (input_location,
1993 "-freorder-blocks-and-partition not supported on this architecture");
1994 flag_reorder_blocks_and_partition = 0;
1995 flag_reorder_blocks = 1;
1999 /* Hoisting PIC address calculations more aggressively provides a small,
2000 but measurable, size reduction for PIC code. Therefore, we decrease
2001 the bar for unrestricted expression hoisting to the cost of PIC address
2002 calculation, which is 2 instructions. */
2003 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2004 global_options.x_param_values,
2005 global_options_set.x_param_values);
2007 /* ARM EABI defaults to strict volatile bitfields. */
2008 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2009 flag_strict_volatile_bitfields = 1;
2011 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2012 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2013 if (flag_prefetch_loop_arrays < 0
2016 && current_tune->num_prefetch_slots > 0)
2017 flag_prefetch_loop_arrays = 1;
2019 /* Set up parameters to be used in prefetching algorithm. Do not override the
2020 defaults unless we are tuning for a core we have researched values for. */
2021 if (current_tune->num_prefetch_slots > 0)
2022 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2023 current_tune->num_prefetch_slots,
2024 global_options.x_param_values,
2025 global_options_set.x_param_values);
2026 if (current_tune->l1_cache_line_size >= 0)
2027 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2028 current_tune->l1_cache_line_size,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2033 current_tune->l1_cache_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
2037 /* Register global variables with the garbage collector. */
2038 arm_add_gc_roots ();
2042 arm_add_gc_roots (void)
2044 gcc_obstack_init(&minipool_obstack);
2045 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2048 /* A table of known ARM exception types.
2049 For use with the interrupt function attribute. */
2053 const char *const arg;
2054 const unsigned long return_value;
2058 static const isr_attribute_arg isr_attribute_args [] =
2060 { "IRQ", ARM_FT_ISR },
2061 { "irq", ARM_FT_ISR },
2062 { "FIQ", ARM_FT_FIQ },
2063 { "fiq", ARM_FT_FIQ },
2064 { "ABORT", ARM_FT_ISR },
2065 { "abort", ARM_FT_ISR },
2066 { "ABORT", ARM_FT_ISR },
2067 { "abort", ARM_FT_ISR },
2068 { "UNDEF", ARM_FT_EXCEPTION },
2069 { "undef", ARM_FT_EXCEPTION },
2070 { "SWI", ARM_FT_EXCEPTION },
2071 { "swi", ARM_FT_EXCEPTION },
2072 { NULL, ARM_FT_NORMAL }
2075 /* Returns the (interrupt) function type of the current
2076 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2078 static unsigned long
2079 arm_isr_value (tree argument)
2081 const isr_attribute_arg * ptr;
2085 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2087 /* No argument - default to IRQ. */
2088 if (argument == NULL_TREE)
2091 /* Get the value of the argument. */
2092 if (TREE_VALUE (argument) == NULL_TREE
2093 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2094 return ARM_FT_UNKNOWN;
2096 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2098 /* Check it against the list of known arguments. */
2099 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2100 if (streq (arg, ptr->arg))
2101 return ptr->return_value;
2103 /* An unrecognized interrupt type. */
2104 return ARM_FT_UNKNOWN;
2107 /* Computes the type of the current function. */
2109 static unsigned long
2110 arm_compute_func_type (void)
2112 unsigned long type = ARM_FT_UNKNOWN;
2116 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2118 /* Decide if the current function is volatile. Such functions
2119 never return, and many memory cycles can be saved by not storing
2120 register values that will never be needed again. This optimization
2121 was added to speed up context switching in a kernel application. */
2123 && (TREE_NOTHROW (current_function_decl)
2124 || !(flag_unwind_tables
2126 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2127 && TREE_THIS_VOLATILE (current_function_decl))
2128 type |= ARM_FT_VOLATILE;
2130 if (cfun->static_chain_decl != NULL)
2131 type |= ARM_FT_NESTED;
2133 attr = DECL_ATTRIBUTES (current_function_decl);
2135 a = lookup_attribute ("naked", attr);
2137 type |= ARM_FT_NAKED;
2139 a = lookup_attribute ("isr", attr);
2141 a = lookup_attribute ("interrupt", attr);
2144 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2146 type |= arm_isr_value (TREE_VALUE (a));
2151 /* Returns the type of the current function. */
2154 arm_current_func_type (void)
2156 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2157 cfun->machine->func_type = arm_compute_func_type ();
2159 return cfun->machine->func_type;
2163 arm_allocate_stack_slots_for_args (void)
2165 /* Naked functions should not allocate stack slots for arguments. */
2166 return !IS_NAKED (arm_current_func_type ());
2170 /* Output assembler code for a block containing the constant parts
2171 of a trampoline, leaving space for the variable parts.
2173 On the ARM, (if r8 is the static chain regnum, and remembering that
2174 referencing pc adds an offset of 8) the trampoline looks like:
2177 .word static chain value
2178 .word function's address
2179 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2182 arm_asm_trampoline_template (FILE *f)
2186 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2187 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2189 else if (TARGET_THUMB2)
2191 /* The Thumb-2 trampoline is similar to the arm implementation.
2192 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2193 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2194 STATIC_CHAIN_REGNUM, PC_REGNUM);
2195 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2199 ASM_OUTPUT_ALIGN (f, 2);
2200 fprintf (f, "\t.code\t16\n");
2201 fprintf (f, ".Ltrampoline_start:\n");
2202 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2203 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2204 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2205 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2206 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2207 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2209 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2210 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2213 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2216 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2218 rtx fnaddr, mem, a_tramp;
2220 emit_block_move (m_tramp, assemble_trampoline_template (),
2221 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2223 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2224 emit_move_insn (mem, chain_value);
2226 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2227 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2228 emit_move_insn (mem, fnaddr);
2230 a_tramp = XEXP (m_tramp, 0);
2231 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2232 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2233 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2236 /* Thumb trampolines should be entered in thumb mode, so set
2237 the bottom bit of the address. */
2240 arm_trampoline_adjust_address (rtx addr)
2243 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2244 NULL, 0, OPTAB_LIB_WIDEN);
2248 /* Return 1 if it is possible to return using a single instruction.
2249 If SIBLING is non-null, this is a test for a return before a sibling
2250 call. SIBLING is the call insn, so we can examine its register usage. */
2253 use_return_insn (int iscond, rtx sibling)
2256 unsigned int func_type;
2257 unsigned long saved_int_regs;
2258 unsigned HOST_WIDE_INT stack_adjust;
2259 arm_stack_offsets *offsets;
2261 /* Never use a return instruction before reload has run. */
2262 if (!reload_completed)
2265 func_type = arm_current_func_type ();
2267 /* Naked, volatile and stack alignment functions need special
2269 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2272 /* So do interrupt functions that use the frame pointer and Thumb
2273 interrupt functions. */
2274 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2277 offsets = arm_get_frame_offsets ();
2278 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2280 /* As do variadic functions. */
2281 if (crtl->args.pretend_args_size
2282 || cfun->machine->uses_anonymous_args
2283 /* Or if the function calls __builtin_eh_return () */
2284 || crtl->calls_eh_return
2285 /* Or if the function calls alloca */
2286 || cfun->calls_alloca
2287 /* Or if there is a stack adjustment. However, if the stack pointer
2288 is saved on the stack, we can use a pre-incrementing stack load. */
2289 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2290 && stack_adjust == 4)))
2293 saved_int_regs = offsets->saved_regs_mask;
2295 /* Unfortunately, the insn
2297 ldmib sp, {..., sp, ...}
2299 triggers a bug on most SA-110 based devices, such that the stack
2300 pointer won't be correctly restored if the instruction takes a
2301 page fault. We work around this problem by popping r3 along with
2302 the other registers, since that is never slower than executing
2303 another instruction.
2305 We test for !arm_arch5 here, because code for any architecture
2306 less than this could potentially be run on one of the buggy
2308 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2310 /* Validate that r3 is a call-clobbered register (always true in
2311 the default abi) ... */
2312 if (!call_used_regs[3])
2315 /* ... that it isn't being used for a return value ... */
2316 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2319 /* ... or for a tail-call argument ... */
2322 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2324 if (find_regno_fusage (sibling, USE, 3))
2328 /* ... and that there are no call-saved registers in r0-r2
2329 (always true in the default ABI). */
2330 if (saved_int_regs & 0x7)
2334 /* Can't be done if interworking with Thumb, and any registers have been
2336 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2339 /* On StrongARM, conditional returns are expensive if they aren't
2340 taken and multiple registers have been stacked. */
2341 if (iscond && arm_tune_strongarm)
2343 /* Conditional return when just the LR is stored is a simple
2344 conditional-load instruction, that's not expensive. */
2345 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2349 && arm_pic_register != INVALID_REGNUM
2350 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2354 /* If there are saved registers but the LR isn't saved, then we need
2355 two instructions for the return. */
2356 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2359 /* Can't be done if any of the FPA regs are pushed,
2360 since this also requires an insn. */
2361 if (TARGET_HARD_FLOAT && TARGET_FPA)
2362 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2363 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2366 /* Likewise VFP regs. */
2367 if (TARGET_HARD_FLOAT && TARGET_VFP)
2368 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2369 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2372 if (TARGET_REALLY_IWMMXT)
2373 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2374 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2380 /* Return TRUE if int I is a valid immediate ARM constant. */
2383 const_ok_for_arm (HOST_WIDE_INT i)
2387 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2388 be all zero, or all one. */
2389 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2390 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2391 != ((~(unsigned HOST_WIDE_INT) 0)
2392 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2395 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2397 /* Fast return for 0 and small values. We must do this for zero, since
2398 the code below can't handle that one case. */
2399 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2402 /* Get the number of trailing zeros. */
2403 lowbit = ffs((int) i) - 1;
2405 /* Only even shifts are allowed in ARM mode so round down to the
2406 nearest even number. */
2410 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2415 /* Allow rotated constants in ARM mode. */
2417 && ((i & ~0xc000003f) == 0
2418 || (i & ~0xf000000f) == 0
2419 || (i & ~0xfc000003) == 0))
2426 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2429 if (i == v || i == (v | (v << 8)))
2432 /* Allow repeated pattern 0xXY00XY00. */
2442 /* Return true if I is a valid constant for the operation CODE. */
2444 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2446 if (const_ok_for_arm (i))
2470 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2472 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2478 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2482 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2489 /* Emit a sequence of insns to handle a large constant.
2490 CODE is the code of the operation required, it can be any of SET, PLUS,
2491 IOR, AND, XOR, MINUS;
2492 MODE is the mode in which the operation is being performed;
2493 VAL is the integer to operate on;
2494 SOURCE is the other operand (a register, or a null-pointer for SET);
2495 SUBTARGETS means it is safe to create scratch registers if that will
2496 either produce a simpler sequence, or we will want to cse the values.
2497 Return value is the number of insns emitted. */
2499 /* ??? Tweak this for thumb2. */
2501 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2502 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2506 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2507 cond = COND_EXEC_TEST (PATTERN (insn));
2511 if (subtargets || code == SET
2512 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2513 && REGNO (target) != REGNO (source)))
2515 /* After arm_reorg has been called, we can't fix up expensive
2516 constants by pushing them into memory so we must synthesize
2517 them in-line, regardless of the cost. This is only likely to
2518 be more costly on chips that have load delay slots and we are
2519 compiling without running the scheduler (so no splitting
2520 occurred before the final instruction emission).
2522 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2524 if (!after_arm_reorg
2526 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2528 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2533 /* Currently SET is the only monadic value for CODE, all
2534 the rest are diadic. */
2535 if (TARGET_USE_MOVT)
2536 arm_emit_movpair (target, GEN_INT (val));
2538 emit_set_insn (target, GEN_INT (val));
2544 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2546 if (TARGET_USE_MOVT)
2547 arm_emit_movpair (temp, GEN_INT (val));
2549 emit_set_insn (temp, GEN_INT (val));
2551 /* For MINUS, the value is subtracted from, since we never
2552 have subtraction of a constant. */
2554 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2556 emit_set_insn (target,
2557 gen_rtx_fmt_ee (code, mode, source, temp));
2563 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2567 /* Return the number of instructions required to synthesize the given
2568 constant, if we start emitting them from bit-position I. */
2570 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2572 HOST_WIDE_INT temp1;
2573 int step_size = TARGET_ARM ? 2 : 1;
2576 gcc_assert (TARGET_ARM || i == 0);
2584 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2589 temp1 = remainder & ((0x0ff << end)
2590 | ((i < end) ? (0xff >> (32 - end)) : 0));
2591 remainder &= ~temp1;
2596 } while (remainder);
2601 find_best_start (unsigned HOST_WIDE_INT remainder)
2603 int best_consecutive_zeros = 0;
2607 /* If we aren't targetting ARM, the best place to start is always at
2612 for (i = 0; i < 32; i += 2)
2614 int consecutive_zeros = 0;
2616 if (!(remainder & (3 << i)))
2618 while ((i < 32) && !(remainder & (3 << i)))
2620 consecutive_zeros += 2;
2623 if (consecutive_zeros > best_consecutive_zeros)
2625 best_consecutive_zeros = consecutive_zeros;
2626 best_start = i - consecutive_zeros;
2632 /* So long as it won't require any more insns to do so, it's
2633 desirable to emit a small constant (in bits 0...9) in the last
2634 insn. This way there is more chance that it can be combined with
2635 a later addressing insn to form a pre-indexed load or store
2636 operation. Consider:
2638 *((volatile int *)0xe0000100) = 1;
2639 *((volatile int *)0xe0000110) = 2;
2641 We want this to wind up as:
2645 str rB, [rA, #0x100]
2647 str rB, [rA, #0x110]
2649 rather than having to synthesize both large constants from scratch.
2651 Therefore, we calculate how many insns would be required to emit
2652 the constant starting from `best_start', and also starting from
2653 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2654 yield a shorter sequence, we may as well use zero. */
2656 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2657 && (count_insns_for_constant (remainder, 0) <=
2658 count_insns_for_constant (remainder, best_start)))
2664 /* Emit an instruction with the indicated PATTERN. If COND is
2665 non-NULL, conditionalize the execution of the instruction on COND
2669 emit_constant_insn (rtx cond, rtx pattern)
2672 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2673 emit_insn (pattern);
2676 /* As above, but extra parameter GENERATE which, if clear, suppresses
2678 /* ??? This needs more work for thumb2. */
2681 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2682 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2687 int final_invert = 0;
2688 int can_negate_initial = 0;
2690 int num_bits_set = 0;
2691 int set_sign_bit_copies = 0;
2692 int clear_sign_bit_copies = 0;
2693 int clear_zero_bit_copies = 0;
2694 int set_zero_bit_copies = 0;
2696 unsigned HOST_WIDE_INT temp1, temp2;
2697 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2698 int step_size = TARGET_ARM ? 2 : 1;
2700 /* Find out which operations are safe for a given CODE. Also do a quick
2701 check for degenerate cases; these can occur when DImode operations
2712 can_negate_initial = 1;
2716 if (remainder == 0xffffffff)
2719 emit_constant_insn (cond,
2720 gen_rtx_SET (VOIDmode, target,
2721 GEN_INT (ARM_SIGN_EXTEND (val))));
2727 if (reload_completed && rtx_equal_p (target, source))
2731 emit_constant_insn (cond,
2732 gen_rtx_SET (VOIDmode, target, source));
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, const0_rtx));
2748 if (remainder == 0xffffffff)
2750 if (reload_completed && rtx_equal_p (target, source))
2753 emit_constant_insn (cond,
2754 gen_rtx_SET (VOIDmode, target, source));
2763 if (reload_completed && rtx_equal_p (target, source))
2766 emit_constant_insn (cond,
2767 gen_rtx_SET (VOIDmode, target, source));
2771 if (remainder == 0xffffffff)
2774 emit_constant_insn (cond,
2775 gen_rtx_SET (VOIDmode, target,
2776 gen_rtx_NOT (mode, source)));
2782 /* We treat MINUS as (val - source), since (source - val) is always
2783 passed as (source + (-val)). */
2787 emit_constant_insn (cond,
2788 gen_rtx_SET (VOIDmode, target,
2789 gen_rtx_NEG (mode, source)));
2792 if (const_ok_for_arm (val))
2795 emit_constant_insn (cond,
2796 gen_rtx_SET (VOIDmode, target,
2797 gen_rtx_MINUS (mode, GEN_INT (val),
2809 /* If we can do it in one insn get out quickly. */
2810 if (const_ok_for_arm (val)
2811 || (can_negate_initial && const_ok_for_arm (-val))
2812 || (can_invert && const_ok_for_arm (~val)))
2815 emit_constant_insn (cond,
2816 gen_rtx_SET (VOIDmode, target,
2818 ? gen_rtx_fmt_ee (code, mode, source,
2824 /* Calculate a few attributes that may be useful for specific
2826 /* Count number of leading zeros. */
2827 for (i = 31; i >= 0; i--)
2829 if ((remainder & (1 << i)) == 0)
2830 clear_sign_bit_copies++;
2835 /* Count number of leading 1's. */
2836 for (i = 31; i >= 0; i--)
2838 if ((remainder & (1 << i)) != 0)
2839 set_sign_bit_copies++;
2844 /* Count number of trailing zero's. */
2845 for (i = 0; i <= 31; i++)
2847 if ((remainder & (1 << i)) == 0)
2848 clear_zero_bit_copies++;
2853 /* Count number of trailing 1's. */
2854 for (i = 0; i <= 31; i++)
2856 if ((remainder & (1 << i)) != 0)
2857 set_zero_bit_copies++;
2865 /* See if we can use movw. */
2866 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2869 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2874 /* See if we can do this by sign_extending a constant that is known
2875 to be negative. This is a good, way of doing it, since the shift
2876 may well merge into a subsequent insn. */
2877 if (set_sign_bit_copies > 1)
2879 if (const_ok_for_arm
2880 (temp1 = ARM_SIGN_EXTEND (remainder
2881 << (set_sign_bit_copies - 1))))
2885 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2886 emit_constant_insn (cond,
2887 gen_rtx_SET (VOIDmode, new_src,
2889 emit_constant_insn (cond,
2890 gen_ashrsi3 (target, new_src,
2891 GEN_INT (set_sign_bit_copies - 1)));
2895 /* For an inverted constant, we will need to set the low bits,
2896 these will be shifted out of harm's way. */
2897 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2898 if (const_ok_for_arm (~temp1))
2902 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2903 emit_constant_insn (cond,
2904 gen_rtx_SET (VOIDmode, new_src,
2906 emit_constant_insn (cond,
2907 gen_ashrsi3 (target, new_src,
2908 GEN_INT (set_sign_bit_copies - 1)));
2914 /* See if we can calculate the value as the difference between two
2915 valid immediates. */
2916 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2918 int topshift = clear_sign_bit_copies & ~1;
2920 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2921 & (0xff000000 >> topshift));
2923 /* If temp1 is zero, then that means the 9 most significant
2924 bits of remainder were 1 and we've caused it to overflow.
2925 When topshift is 0 we don't need to do anything since we
2926 can borrow from 'bit 32'. */
2927 if (temp1 == 0 && topshift != 0)
2928 temp1 = 0x80000000 >> (topshift - 1);
2930 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2932 if (const_ok_for_arm (temp2))
2936 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2937 emit_constant_insn (cond,
2938 gen_rtx_SET (VOIDmode, new_src,
2940 emit_constant_insn (cond,
2941 gen_addsi3 (target, new_src,
2949 /* See if we can generate this by setting the bottom (or the top)
2950 16 bits, and then shifting these into the other half of the
2951 word. We only look for the simplest cases, to do more would cost
2952 too much. Be careful, however, not to generate this when the
2953 alternative would take fewer insns. */
2954 if (val & 0xffff0000)
2956 temp1 = remainder & 0xffff0000;
2957 temp2 = remainder & 0x0000ffff;
2959 /* Overlaps outside this range are best done using other methods. */
2960 for (i = 9; i < 24; i++)
2962 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2963 && !const_ok_for_arm (temp2))
2965 rtx new_src = (subtargets
2966 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2968 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2969 source, subtargets, generate);
2977 gen_rtx_ASHIFT (mode, source,
2984 /* Don't duplicate cases already considered. */
2985 for (i = 17; i < 24; i++)
2987 if (((temp1 | (temp1 >> i)) == remainder)
2988 && !const_ok_for_arm (temp1))
2990 rtx new_src = (subtargets
2991 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2993 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2994 source, subtargets, generate);
2999 gen_rtx_SET (VOIDmode, target,
3002 gen_rtx_LSHIFTRT (mode, source,
3013 /* If we have IOR or XOR, and the constant can be loaded in a
3014 single instruction, and we can find a temporary to put it in,
3015 then this can be done in two instructions instead of 3-4. */
3017 /* TARGET can't be NULL if SUBTARGETS is 0 */
3018 || (reload_completed && !reg_mentioned_p (target, source)))
3020 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3024 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3026 emit_constant_insn (cond,
3027 gen_rtx_SET (VOIDmode, sub,
3029 emit_constant_insn (cond,
3030 gen_rtx_SET (VOIDmode, target,
3031 gen_rtx_fmt_ee (code, mode,
3042 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3043 and the remainder 0s for e.g. 0xfff00000)
3044 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3046 This can be done in 2 instructions by using shifts with mov or mvn.
3051 mvn r0, r0, lsr #12 */
3052 if (set_sign_bit_copies > 8
3053 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3057 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3058 rtx shift = GEN_INT (set_sign_bit_copies);
3062 gen_rtx_SET (VOIDmode, sub,
3064 gen_rtx_ASHIFT (mode,
3069 gen_rtx_SET (VOIDmode, target,
3071 gen_rtx_LSHIFTRT (mode, sub,
3078 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3080 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3082 For eg. r0 = r0 | 0xfff
3087 if (set_zero_bit_copies > 8
3088 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3092 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3093 rtx shift = GEN_INT (set_zero_bit_copies);
3097 gen_rtx_SET (VOIDmode, sub,
3099 gen_rtx_LSHIFTRT (mode,
3104 gen_rtx_SET (VOIDmode, target,
3106 gen_rtx_ASHIFT (mode, sub,
3112 /* This will never be reached for Thumb2 because orn is a valid
3113 instruction. This is for Thumb1 and the ARM 32 bit cases.
3115 x = y | constant (such that ~constant is a valid constant)
3117 x = ~(~y & ~constant).
3119 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3123 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3124 emit_constant_insn (cond,
3125 gen_rtx_SET (VOIDmode, sub,
3126 gen_rtx_NOT (mode, source)));
3129 sub = gen_reg_rtx (mode);
3130 emit_constant_insn (cond,
3131 gen_rtx_SET (VOIDmode, sub,
3132 gen_rtx_AND (mode, source,
3134 emit_constant_insn (cond,
3135 gen_rtx_SET (VOIDmode, target,
3136 gen_rtx_NOT (mode, sub)));
3143 /* See if two shifts will do 2 or more insn's worth of work. */
3144 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3146 HOST_WIDE_INT shift_mask = ((0xffffffff
3147 << (32 - clear_sign_bit_copies))
3150 if ((remainder | shift_mask) != 0xffffffff)
3154 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3155 insns = arm_gen_constant (AND, mode, cond,
3156 remainder | shift_mask,
3157 new_src, source, subtargets, 1);
3162 rtx targ = subtargets ? NULL_RTX : target;
3163 insns = arm_gen_constant (AND, mode, cond,
3164 remainder | shift_mask,
3165 targ, source, subtargets, 0);
3171 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3172 rtx shift = GEN_INT (clear_sign_bit_copies);
3174 emit_insn (gen_ashlsi3 (new_src, source, shift));
3175 emit_insn (gen_lshrsi3 (target, new_src, shift));
3181 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3183 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3185 if ((remainder | shift_mask) != 0xffffffff)
3189 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3191 insns = arm_gen_constant (AND, mode, cond,
3192 remainder | shift_mask,
3193 new_src, source, subtargets, 1);
3198 rtx targ = subtargets ? NULL_RTX : target;
3200 insns = arm_gen_constant (AND, mode, cond,
3201 remainder | shift_mask,
3202 targ, source, subtargets, 0);
3208 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3209 rtx shift = GEN_INT (clear_zero_bit_copies);
3211 emit_insn (gen_lshrsi3 (new_src, source, shift));
3212 emit_insn (gen_ashlsi3 (target, new_src, shift));
3224 for (i = 0; i < 32; i++)
3225 if (remainder & (1 << i))
3229 || (code != IOR && can_invert && num_bits_set > 16))
3230 remainder ^= 0xffffffff;
3231 else if (code == PLUS && num_bits_set > 16)
3232 remainder = (-remainder) & 0xffffffff;
3234 /* For XOR, if more than half the bits are set and there's a sequence
3235 of more than 8 consecutive ones in the pattern then we can XOR by the
3236 inverted constant and then invert the final result; this may save an
3237 instruction and might also lead to the final mvn being merged with
3238 some other operation. */
3239 else if (code == XOR && num_bits_set > 16
3240 && (count_insns_for_constant (remainder ^ 0xffffffff,
3242 (remainder ^ 0xffffffff))
3243 < count_insns_for_constant (remainder,
3244 find_best_start (remainder))))
3246 remainder ^= 0xffffffff;
3255 /* Now try and find a way of doing the job in either two or three
3257 We start by looking for the largest block of zeros that are aligned on
3258 a 2-bit boundary, we then fill up the temps, wrapping around to the
3259 top of the word when we drop off the bottom.
3260 In the worst case this code should produce no more than four insns.
3261 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3262 best place to start. */
3264 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3267 /* Now start emitting the insns. */
3268 i = find_best_start (remainder);
3275 if (remainder & (3 << (i - 2)))
3280 temp1 = remainder & ((0x0ff << end)
3281 | ((i < end) ? (0xff >> (32 - end)) : 0));
3282 remainder &= ~temp1;
3286 rtx new_src, temp1_rtx;
3288 if (code == SET || code == MINUS)
3290 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3291 if (can_invert && code != MINUS)
3296 if ((final_invert || remainder) && subtargets)
3297 new_src = gen_reg_rtx (mode);
3302 else if (can_negate)
3306 temp1 = trunc_int_for_mode (temp1, mode);
3307 temp1_rtx = GEN_INT (temp1);
3311 else if (code == MINUS)
3312 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3314 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3316 emit_constant_insn (cond,
3317 gen_rtx_SET (VOIDmode, new_src,
3327 else if (code == MINUS)
3333 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3343 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3344 gen_rtx_NOT (mode, source)));
3351 /* Canonicalize a comparison so that we are more likely to recognize it.
3352 This can be done for a few constant compares, where we can make the
3353 immediate value easier to load. */
3356 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3358 enum machine_mode mode;
3359 unsigned HOST_WIDE_INT i, maxval;
3361 mode = GET_MODE (*op0);
3362 if (mode == VOIDmode)
3363 mode = GET_MODE (*op1);
3365 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3367 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3368 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3369 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3370 for GTU/LEU in Thumb mode. */
3375 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3377 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3380 if (code == GT || code == LE
3381 || (!TARGET_ARM && (code == GTU || code == LEU)))
3383 /* Missing comparison. First try to use an available
3385 if (GET_CODE (*op1) == CONST_INT)
3393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3395 *op1 = GEN_INT (i + 1);
3396 return code == GT ? GE : LT;
3401 if (i != ~((unsigned HOST_WIDE_INT) 0)
3402 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3404 *op1 = GEN_INT (i + 1);
3405 return code == GTU ? GEU : LTU;
3413 /* If that did not work, reverse the condition. */
3417 return swap_condition (code);
3423 /* Comparisons smaller than DImode. Only adjust comparisons against
3424 an out-of-range constant. */
3425 if (GET_CODE (*op1) != CONST_INT
3426 || const_ok_for_arm (INTVAL (*op1))
3427 || const_ok_for_arm (- INTVAL (*op1)))
3441 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3443 *op1 = GEN_INT (i + 1);
3444 return code == GT ? GE : LT;
3451 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3453 *op1 = GEN_INT (i - 1);
3454 return code == GE ? GT : LE;
3460 if (i != ~((unsigned HOST_WIDE_INT) 0)
3461 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3463 *op1 = GEN_INT (i + 1);
3464 return code == GTU ? GEU : LTU;
3471 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3473 *op1 = GEN_INT (i - 1);
3474 return code == GEU ? GTU : LEU;
3486 /* Define how to find the value returned by a function. */
3489 arm_function_value(const_tree type, const_tree func,
3490 bool outgoing ATTRIBUTE_UNUSED)
3492 enum machine_mode mode;
3493 int unsignedp ATTRIBUTE_UNUSED;
3494 rtx r ATTRIBUTE_UNUSED;
3496 mode = TYPE_MODE (type);
3498 if (TARGET_AAPCS_BASED)
3499 return aapcs_allocate_return_reg (mode, type, func);
3501 /* Promote integer types. */
3502 if (INTEGRAL_TYPE_P (type))
3503 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3505 /* Promotes small structs returned in a register to full-word size
3506 for big-endian AAPCS. */
3507 if (arm_return_in_msb (type))
3509 HOST_WIDE_INT size = int_size_in_bytes (type);
3510 if (size % UNITS_PER_WORD != 0)
3512 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3513 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3517 return LIBCALL_VALUE (mode);
3521 libcall_eq (const void *p1, const void *p2)
3523 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3527 libcall_hash (const void *p1)
3529 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3533 add_libcall (htab_t htab, rtx libcall)
3535 *htab_find_slot (htab, libcall, INSERT) = libcall;
3539 arm_libcall_uses_aapcs_base (const_rtx libcall)
3541 static bool init_done = false;
3542 static htab_t libcall_htab;
3548 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3550 add_libcall (libcall_htab,
3551 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3552 add_libcall (libcall_htab,
3553 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3554 add_libcall (libcall_htab,
3555 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3556 add_libcall (libcall_htab,
3557 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3559 add_libcall (libcall_htab,
3560 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3561 add_libcall (libcall_htab,
3562 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3568 add_libcall (libcall_htab,
3569 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3570 add_libcall (libcall_htab,
3571 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3572 add_libcall (libcall_htab,
3573 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3574 add_libcall (libcall_htab,
3575 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3576 add_libcall (libcall_htab,
3577 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3578 add_libcall (libcall_htab,
3579 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3582 return libcall && htab_find (libcall_htab, libcall) != NULL;
3586 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3588 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3589 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3591 /* The following libcalls return their result in integer registers,
3592 even though they return a floating point value. */
3593 if (arm_libcall_uses_aapcs_base (libcall))
3594 return gen_rtx_REG (mode, ARG_REGISTER(1));
3598 return LIBCALL_VALUE (mode);
3601 /* Determine the amount of memory needed to store the possible return
3602 registers of an untyped call. */
3604 arm_apply_result_size (void)
3610 if (TARGET_HARD_FLOAT_ABI)
3616 if (TARGET_MAVERICK)
3619 if (TARGET_IWMMXT_ABI)
3626 /* Decide whether TYPE should be returned in memory (true)
3627 or in a register (false). FNTYPE is the type of the function making
3630 arm_return_in_memory (const_tree type, const_tree fntype)
3634 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3636 if (TARGET_AAPCS_BASED)
3638 /* Simple, non-aggregate types (ie not including vectors and
3639 complex) are always returned in a register (or registers).
3640 We don't care about which register here, so we can short-cut
3641 some of the detail. */
3642 if (!AGGREGATE_TYPE_P (type)
3643 && TREE_CODE (type) != VECTOR_TYPE
3644 && TREE_CODE (type) != COMPLEX_TYPE)
3647 /* Any return value that is no larger than one word can be
3649 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3652 /* Check any available co-processors to see if they accept the
3653 type as a register candidate (VFP, for example, can return
3654 some aggregates in consecutive registers). These aren't
3655 available if the call is variadic. */
3656 if (aapcs_select_return_coproc (type, fntype) >= 0)
3659 /* Vector values should be returned using ARM registers, not
3660 memory (unless they're over 16 bytes, which will break since
3661 we only have four call-clobbered registers to play with). */
3662 if (TREE_CODE (type) == VECTOR_TYPE)
3663 return (size < 0 || size > (4 * UNITS_PER_WORD));
3665 /* The rest go in memory. */
3669 if (TREE_CODE (type) == VECTOR_TYPE)
3670 return (size < 0 || size > (4 * UNITS_PER_WORD));
3672 if (!AGGREGATE_TYPE_P (type) &&
3673 (TREE_CODE (type) != VECTOR_TYPE))
3674 /* All simple types are returned in registers. */
3677 if (arm_abi != ARM_ABI_APCS)
3679 /* ATPCS and later return aggregate types in memory only if they are
3680 larger than a word (or are variable size). */
3681 return (size < 0 || size > UNITS_PER_WORD);
3684 /* For the arm-wince targets we choose to be compatible with Microsoft's
3685 ARM and Thumb compilers, which always return aggregates in memory. */
3687 /* All structures/unions bigger than one word are returned in memory.
3688 Also catch the case where int_size_in_bytes returns -1. In this case
3689 the aggregate is either huge or of variable size, and in either case
3690 we will want to return it via memory and not in a register. */
3691 if (size < 0 || size > UNITS_PER_WORD)
3694 if (TREE_CODE (type) == RECORD_TYPE)
3698 /* For a struct the APCS says that we only return in a register
3699 if the type is 'integer like' and every addressable element
3700 has an offset of zero. For practical purposes this means
3701 that the structure can have at most one non bit-field element
3702 and that this element must be the first one in the structure. */
3704 /* Find the first field, ignoring non FIELD_DECL things which will
3705 have been created by C++. */
3706 for (field = TYPE_FIELDS (type);
3707 field && TREE_CODE (field) != FIELD_DECL;
3708 field = DECL_CHAIN (field))
3712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3714 /* Check that the first field is valid for returning in a register. */
3716 /* ... Floats are not allowed */
3717 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3720 /* ... Aggregates that are not themselves valid for returning in
3721 a register are not allowed. */
3722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3725 /* Now check the remaining fields, if any. Only bitfields are allowed,
3726 since they are not addressable. */
3727 for (field = DECL_CHAIN (field);
3729 field = DECL_CHAIN (field))
3731 if (TREE_CODE (field) != FIELD_DECL)
3734 if (!DECL_BIT_FIELD_TYPE (field))
3741 if (TREE_CODE (type) == UNION_TYPE)
3745 /* Unions can be returned in registers if every element is
3746 integral, or can be returned in an integer register. */
3747 for (field = TYPE_FIELDS (type);
3749 field = DECL_CHAIN (field))
3751 if (TREE_CODE (field) != FIELD_DECL)
3754 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3757 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3763 #endif /* not ARM_WINCE */
3765 /* Return all other types in memory. */
3769 /* Indicate whether or not words of a double are in big-endian order. */
3772 arm_float_words_big_endian (void)
3774 if (TARGET_MAVERICK)
3777 /* For FPA, float words are always big-endian. For VFP, floats words
3778 follow the memory system mode. */
3786 return (TARGET_BIG_END ? 1 : 0);
3791 const struct pcs_attribute_arg
3795 } pcs_attribute_args[] =
3797 {"aapcs", ARM_PCS_AAPCS},
3798 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3800 /* We could recognize these, but changes would be needed elsewhere
3801 * to implement them. */
3802 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3803 {"atpcs", ARM_PCS_ATPCS},
3804 {"apcs", ARM_PCS_APCS},
3806 {NULL, ARM_PCS_UNKNOWN}
3810 arm_pcs_from_attribute (tree attr)
3812 const struct pcs_attribute_arg *ptr;
3815 /* Get the value of the argument. */
3816 if (TREE_VALUE (attr) == NULL_TREE
3817 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3818 return ARM_PCS_UNKNOWN;
3820 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3822 /* Check it against the list of known arguments. */
3823 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3824 if (streq (arg, ptr->arg))
3827 /* An unrecognized interrupt type. */
3828 return ARM_PCS_UNKNOWN;
3831 /* Get the PCS variant to use for this call. TYPE is the function's type
3832 specification, DECL is the specific declartion. DECL may be null if
3833 the call could be indirect or if this is a library call. */
3835 arm_get_pcs_model (const_tree type, const_tree decl)
3837 bool user_convention = false;
3838 enum arm_pcs user_pcs = arm_pcs_default;
3843 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3846 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3847 user_convention = true;
3850 if (TARGET_AAPCS_BASED)
3852 /* Detect varargs functions. These always use the base rules
3853 (no argument is ever a candidate for a co-processor
3855 bool base_rules = stdarg_p (type);
3857 if (user_convention)
3859 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3860 sorry ("non-AAPCS derived PCS variant");
3861 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3862 error ("variadic functions must use the base AAPCS variant");
3866 return ARM_PCS_AAPCS;
3867 else if (user_convention)
3869 else if (decl && flag_unit_at_a_time)
3871 /* Local functions never leak outside this compilation unit,
3872 so we are free to use whatever conventions are
3874 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3875 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3877 return ARM_PCS_AAPCS_LOCAL;
3880 else if (user_convention && user_pcs != arm_pcs_default)
3881 sorry ("PCS variant");
3883 /* For everything else we use the target's default. */
3884 return arm_pcs_default;
3889 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3890 const_tree fntype ATTRIBUTE_UNUSED,
3891 rtx libcall ATTRIBUTE_UNUSED,
3892 const_tree fndecl ATTRIBUTE_UNUSED)
3894 /* Record the unallocated VFP registers. */
3895 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3896 pcum->aapcs_vfp_reg_alloc = 0;
3899 /* Walk down the type tree of TYPE counting consecutive base elements.
3900 If *MODEP is VOIDmode, then set it to the first valid floating point
3901 type. If a non-floating point type is found, or if a floating point
3902 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3903 otherwise return the count in the sub-tree. */
3905 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3907 enum machine_mode mode;
3910 switch (TREE_CODE (type))
3913 mode = TYPE_MODE (type);
3914 if (mode != DFmode && mode != SFmode)
3917 if (*modep == VOIDmode)
3926 mode = TYPE_MODE (TREE_TYPE (type));
3927 if (mode != DFmode && mode != SFmode)
3930 if (*modep == VOIDmode)
3939 /* Use V2SImode and V4SImode as representatives of all 64-bit
3940 and 128-bit vector types, whether or not those modes are
3941 supported with the present options. */
3942 size = int_size_in_bytes (type);
3955 if (*modep == VOIDmode)
3958 /* Vector modes are considered to be opaque: two vectors are
3959 equivalent for the purposes of being homogeneous aggregates
3960 if they are the same size. */
3969 tree index = TYPE_DOMAIN (type);
3971 /* Can't handle incomplete types. */
3972 if (!COMPLETE_TYPE_P(type))
3975 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3978 || !TYPE_MAX_VALUE (index)
3979 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3980 || !TYPE_MIN_VALUE (index)
3981 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3985 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3986 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3988 /* There must be no padding. */
3989 if (!host_integerp (TYPE_SIZE (type), 1)
3990 || (tree_low_cst (TYPE_SIZE (type), 1)
3991 != count * GET_MODE_BITSIZE (*modep)))
4003 /* Can't handle incomplete types. */
4004 if (!COMPLETE_TYPE_P(type))
4007 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4009 if (TREE_CODE (field) != FIELD_DECL)
4012 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4018 /* There must be no padding. */
4019 if (!host_integerp (TYPE_SIZE (type), 1)
4020 || (tree_low_cst (TYPE_SIZE (type), 1)
4021 != count * GET_MODE_BITSIZE (*modep)))
4028 case QUAL_UNION_TYPE:
4030 /* These aren't very interesting except in a degenerate case. */
4035 /* Can't handle incomplete types. */
4036 if (!COMPLETE_TYPE_P(type))
4039 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4041 if (TREE_CODE (field) != FIELD_DECL)
4044 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4047 count = count > sub_count ? count : sub_count;
4050 /* There must be no padding. */
4051 if (!host_integerp (TYPE_SIZE (type), 1)
4052 || (tree_low_cst (TYPE_SIZE (type), 1)
4053 != count * GET_MODE_BITSIZE (*modep)))
4066 /* Return true if PCS_VARIANT should use VFP registers. */
4068 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4070 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4072 static bool seen_thumb1_vfp = false;
4074 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4076 sorry ("Thumb-1 hard-float VFP ABI");
4077 /* sorry() is not immediately fatal, so only display this once. */
4078 seen_thumb1_vfp = true;
4084 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4087 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4088 (TARGET_VFP_DOUBLE || !is_double));
4092 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4093 enum machine_mode mode, const_tree type,
4094 enum machine_mode *base_mode, int *count)
4096 enum machine_mode new_mode = VOIDmode;
4098 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4099 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4100 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4105 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4108 new_mode = (mode == DCmode ? DFmode : SFmode);
4110 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4112 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4114 if (ag_count > 0 && ag_count <= 4)
4123 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4126 *base_mode = new_mode;
4131 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4132 enum machine_mode mode, const_tree type)
4134 int count ATTRIBUTE_UNUSED;
4135 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4137 if (!use_vfp_abi (pcs_variant, false))
4139 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4144 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4147 if (!use_vfp_abi (pcum->pcs_variant, false))
4150 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4151 &pcum->aapcs_vfp_rmode,
4152 &pcum->aapcs_vfp_rcount);
4156 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4157 const_tree type ATTRIBUTE_UNUSED)
4159 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4160 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4163 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4164 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4166 pcum->aapcs_vfp_reg_alloc = mask << regno;
4167 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4170 int rcount = pcum->aapcs_vfp_rcount;
4172 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4176 /* Avoid using unsupported vector modes. */
4177 if (rmode == V2SImode)
4179 else if (rmode == V4SImode)
4186 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4187 for (i = 0; i < rcount; i++)
4189 rtx tmp = gen_rtx_REG (rmode,
4190 FIRST_VFP_REGNUM + regno + i * rshift);
4191 tmp = gen_rtx_EXPR_LIST
4193 GEN_INT (i * GET_MODE_SIZE (rmode)));
4194 XVECEXP (par, 0, i) = tmp;
4197 pcum->aapcs_reg = par;
4200 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4207 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4208 enum machine_mode mode,
4209 const_tree type ATTRIBUTE_UNUSED)
4211 if (!use_vfp_abi (pcs_variant, false))
4214 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4217 enum machine_mode ag_mode;
4222 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4227 if (ag_mode == V2SImode)
4229 else if (ag_mode == V4SImode)
4235 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4236 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4237 for (i = 0; i < count; i++)
4239 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4240 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4241 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4242 XVECEXP (par, 0, i) = tmp;
4248 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4252 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4253 enum machine_mode mode ATTRIBUTE_UNUSED,
4254 const_tree type ATTRIBUTE_UNUSED)
4256 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4257 pcum->aapcs_vfp_reg_alloc = 0;
4261 #define AAPCS_CP(X) \
4263 aapcs_ ## X ## _cum_init, \
4264 aapcs_ ## X ## _is_call_candidate, \
4265 aapcs_ ## X ## _allocate, \
4266 aapcs_ ## X ## _is_return_candidate, \
4267 aapcs_ ## X ## _allocate_return_reg, \
4268 aapcs_ ## X ## _advance \
4271 /* Table of co-processors that can be used to pass arguments in
4272 registers. Idealy no arugment should be a candidate for more than
4273 one co-processor table entry, but the table is processed in order
4274 and stops after the first match. If that entry then fails to put
4275 the argument into a co-processor register, the argument will go on
4279 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4280 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4282 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4283 BLKmode) is a candidate for this co-processor's registers; this
4284 function should ignore any position-dependent state in
4285 CUMULATIVE_ARGS and only use call-type dependent information. */
4286 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4288 /* Return true if the argument does get a co-processor register; it
4289 should set aapcs_reg to an RTX of the register allocated as is
4290 required for a return from FUNCTION_ARG. */
4291 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4293 /* Return true if a result of mode MODE (or type TYPE if MODE is
4294 BLKmode) is can be returned in this co-processor's registers. */
4295 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4297 /* Allocate and return an RTX element to hold the return type of a
4298 call, this routine must not fail and will only be called if
4299 is_return_candidate returned true with the same parameters. */
4300 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4302 /* Finish processing this argument and prepare to start processing
4304 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4305 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4313 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4318 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4319 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4326 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4328 /* We aren't passed a decl, so we can't check that a call is local.
4329 However, it isn't clear that that would be a win anyway, since it
4330 might limit some tail-calling opportunities. */
4331 enum arm_pcs pcs_variant;
4335 const_tree fndecl = NULL_TREE;
4337 if (TREE_CODE (fntype) == FUNCTION_DECL)
4340 fntype = TREE_TYPE (fntype);
4343 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4346 pcs_variant = arm_pcs_default;
4348 if (pcs_variant != ARM_PCS_AAPCS)
4352 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4353 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4362 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4365 /* We aren't passed a decl, so we can't check that a call is local.
4366 However, it isn't clear that that would be a win anyway, since it
4367 might limit some tail-calling opportunities. */
4368 enum arm_pcs pcs_variant;
4369 int unsignedp ATTRIBUTE_UNUSED;
4373 const_tree fndecl = NULL_TREE;
4375 if (TREE_CODE (fntype) == FUNCTION_DECL)
4378 fntype = TREE_TYPE (fntype);
4381 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4384 pcs_variant = arm_pcs_default;
4386 /* Promote integer types. */
4387 if (type && INTEGRAL_TYPE_P (type))
4388 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4390 if (pcs_variant != ARM_PCS_AAPCS)
4394 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4395 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4397 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4401 /* Promotes small structs returned in a register to full-word size
4402 for big-endian AAPCS. */
4403 if (type && arm_return_in_msb (type))
4405 HOST_WIDE_INT size = int_size_in_bytes (type);
4406 if (size % UNITS_PER_WORD != 0)
4408 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4409 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4413 return gen_rtx_REG (mode, R0_REGNUM);
4417 aapcs_libcall_value (enum machine_mode mode)
4419 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4422 /* Lay out a function argument using the AAPCS rules. The rule
4423 numbers referred to here are those in the AAPCS. */
4425 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4426 const_tree type, bool named)
4431 /* We only need to do this once per argument. */
4432 if (pcum->aapcs_arg_processed)
4435 pcum->aapcs_arg_processed = true;
4437 /* Special case: if named is false then we are handling an incoming
4438 anonymous argument which is on the stack. */
4442 /* Is this a potential co-processor register candidate? */
4443 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4445 int slot = aapcs_select_call_coproc (pcum, mode, type);
4446 pcum->aapcs_cprc_slot = slot;
4448 /* We don't have to apply any of the rules from part B of the
4449 preparation phase, these are handled elsewhere in the
4454 /* A Co-processor register candidate goes either in its own
4455 class of registers or on the stack. */
4456 if (!pcum->aapcs_cprc_failed[slot])
4458 /* C1.cp - Try to allocate the argument to co-processor
4460 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4463 /* C2.cp - Put the argument on the stack and note that we
4464 can't assign any more candidates in this slot. We also
4465 need to note that we have allocated stack space, so that
4466 we won't later try to split a non-cprc candidate between
4467 core registers and the stack. */
4468 pcum->aapcs_cprc_failed[slot] = true;
4469 pcum->can_split = false;
4472 /* We didn't get a register, so this argument goes on the
4474 gcc_assert (pcum->can_split == false);
4479 /* C3 - For double-word aligned arguments, round the NCRN up to the
4480 next even number. */
4481 ncrn = pcum->aapcs_ncrn;
4482 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4485 nregs = ARM_NUM_REGS2(mode, type);
4487 /* Sigh, this test should really assert that nregs > 0, but a GCC
4488 extension allows empty structs and then gives them empty size; it
4489 then allows such a structure to be passed by value. For some of
4490 the code below we have to pretend that such an argument has
4491 non-zero size so that we 'locate' it correctly either in
4492 registers or on the stack. */
4493 gcc_assert (nregs >= 0);
4495 nregs2 = nregs ? nregs : 1;
4497 /* C4 - Argument fits entirely in core registers. */
4498 if (ncrn + nregs2 <= NUM_ARG_REGS)
4500 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4501 pcum->aapcs_next_ncrn = ncrn + nregs;
4505 /* C5 - Some core registers left and there are no arguments already
4506 on the stack: split this argument between the remaining core
4507 registers and the stack. */
4508 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4511 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4512 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4516 /* C6 - NCRN is set to 4. */
4517 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4519 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4523 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4524 for a call to a function whose data type is FNTYPE.
4525 For a library call, FNTYPE is NULL. */
4527 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4529 tree fndecl ATTRIBUTE_UNUSED)
4531 /* Long call handling. */
4533 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4535 pcum->pcs_variant = arm_pcs_default;
4537 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4539 if (arm_libcall_uses_aapcs_base (libname))
4540 pcum->pcs_variant = ARM_PCS_AAPCS;
4542 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4543 pcum->aapcs_reg = NULL_RTX;
4544 pcum->aapcs_partial = 0;
4545 pcum->aapcs_arg_processed = false;
4546 pcum->aapcs_cprc_slot = -1;
4547 pcum->can_split = true;
4549 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4553 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4555 pcum->aapcs_cprc_failed[i] = false;
4556 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4564 /* On the ARM, the offset starts at 0. */
4566 pcum->iwmmxt_nregs = 0;
4567 pcum->can_split = true;
4569 /* Varargs vectors are treated the same as long long.
4570 named_count avoids having to change the way arm handles 'named' */
4571 pcum->named_count = 0;
4574 if (TARGET_REALLY_IWMMXT && fntype)
4578 for (fn_arg = TYPE_ARG_TYPES (fntype);
4580 fn_arg = TREE_CHAIN (fn_arg))
4581 pcum->named_count += 1;
4583 if (! pcum->named_count)
4584 pcum->named_count = INT_MAX;
4589 /* Return true if mode/type need doubleword alignment. */
4591 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4593 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4594 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4598 /* Determine where to put an argument to a function.
4599 Value is zero to push the argument on the stack,
4600 or a hard register in which to store the argument.
4602 MODE is the argument's machine mode.
4603 TYPE is the data type of the argument (as a tree).
4604 This is null for libcalls where that information may
4606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4607 the preceding args and about the function being called.
4608 NAMED is nonzero if this argument is a named parameter
4609 (otherwise it is an extra parameter matching an ellipsis).
4611 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4612 other arguments are passed on the stack. If (NAMED == 0) (which happens
4613 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4614 defined), say it is passed in the stack (function_prologue will
4615 indeed make it pass in the stack if necessary). */
4618 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4619 const_tree type, bool named)
4623 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4624 a call insn (op3 of a call_value insn). */
4625 if (mode == VOIDmode)
4628 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4630 aapcs_layout_arg (pcum, mode, type, named);
4631 return pcum->aapcs_reg;
4634 /* Varargs vectors are treated the same as long long.
4635 named_count avoids having to change the way arm handles 'named' */
4636 if (TARGET_IWMMXT_ABI
4637 && arm_vector_mode_supported_p (mode)
4638 && pcum->named_count > pcum->nargs + 1)
4640 if (pcum->iwmmxt_nregs <= 9)
4641 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4644 pcum->can_split = false;
4649 /* Put doubleword aligned quantities in even register pairs. */
4651 && ARM_DOUBLEWORD_ALIGN
4652 && arm_needs_doubleword_align (mode, type))
4655 /* Only allow splitting an arg between regs and memory if all preceding
4656 args were allocated to regs. For args passed by reference we only count
4657 the reference pointer. */
4658 if (pcum->can_split)
4661 nregs = ARM_NUM_REGS2 (mode, type);
4663 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4666 return gen_rtx_REG (mode, pcum->nregs);
4670 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4672 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4673 ? DOUBLEWORD_ALIGNMENT
4678 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4679 tree type, bool named)
4681 int nregs = pcum->nregs;
4683 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4685 aapcs_layout_arg (pcum, mode, type, named);
4686 return pcum->aapcs_partial;
4689 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4692 if (NUM_ARG_REGS > nregs
4693 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4695 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4700 /* Update the data in PCUM to advance over an argument
4701 of mode MODE and data type TYPE.
4702 (TYPE is null for libcalls where that information may not be available.) */
4705 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4706 const_tree type, bool named)
4708 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4710 aapcs_layout_arg (pcum, mode, type, named);
4712 if (pcum->aapcs_cprc_slot >= 0)
4714 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4716 pcum->aapcs_cprc_slot = -1;
4719 /* Generic stuff. */
4720 pcum->aapcs_arg_processed = false;
4721 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4722 pcum->aapcs_reg = NULL_RTX;
4723 pcum->aapcs_partial = 0;
4728 if (arm_vector_mode_supported_p (mode)
4729 && pcum->named_count > pcum->nargs
4730 && TARGET_IWMMXT_ABI)
4731 pcum->iwmmxt_nregs += 1;
4733 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4737 /* Variable sized types are passed by reference. This is a GCC
4738 extension to the ARM ABI. */
4741 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4742 enum machine_mode mode ATTRIBUTE_UNUSED,
4743 const_tree type, bool named ATTRIBUTE_UNUSED)
4745 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4748 /* Encode the current state of the #pragma [no_]long_calls. */
4751 OFF, /* No #pragma [no_]long_calls is in effect. */
4752 LONG, /* #pragma long_calls is in effect. */
4753 SHORT /* #pragma no_long_calls is in effect. */
4756 static arm_pragma_enum arm_pragma_long_calls = OFF;
4759 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4761 arm_pragma_long_calls = LONG;
4765 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4767 arm_pragma_long_calls = SHORT;
4771 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4773 arm_pragma_long_calls = OFF;
4776 /* Handle an attribute requiring a FUNCTION_DECL;
4777 arguments as in struct attribute_spec.handler. */
4779 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4780 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4782 if (TREE_CODE (*node) != FUNCTION_DECL)
4784 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4786 *no_add_attrs = true;
4792 /* Handle an "interrupt" or "isr" attribute;
4793 arguments as in struct attribute_spec.handler. */
4795 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4800 if (TREE_CODE (*node) != FUNCTION_DECL)
4802 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4804 *no_add_attrs = true;
4806 /* FIXME: the argument if any is checked for type attributes;
4807 should it be checked for decl ones? */
4811 if (TREE_CODE (*node) == FUNCTION_TYPE
4812 || TREE_CODE (*node) == METHOD_TYPE)
4814 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4816 warning (OPT_Wattributes, "%qE attribute ignored",
4818 *no_add_attrs = true;
4821 else if (TREE_CODE (*node) == POINTER_TYPE
4822 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4823 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4824 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4826 *node = build_variant_type_copy (*node);
4827 TREE_TYPE (*node) = build_type_attribute_variant
4829 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4830 *no_add_attrs = true;
4834 /* Possibly pass this attribute on from the type to a decl. */
4835 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4836 | (int) ATTR_FLAG_FUNCTION_NEXT
4837 | (int) ATTR_FLAG_ARRAY_NEXT))
4839 *no_add_attrs = true;
4840 return tree_cons (name, args, NULL_TREE);
4844 warning (OPT_Wattributes, "%qE attribute ignored",
4853 /* Handle a "pcs" attribute; arguments as in struct
4854 attribute_spec.handler. */
4856 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4857 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4859 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4861 warning (OPT_Wattributes, "%qE attribute ignored", name);
4862 *no_add_attrs = true;
4867 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4868 /* Handle the "notshared" attribute. This attribute is another way of
4869 requesting hidden visibility. ARM's compiler supports
4870 "__declspec(notshared)"; we support the same thing via an
4874 arm_handle_notshared_attribute (tree *node,
4875 tree name ATTRIBUTE_UNUSED,
4876 tree args ATTRIBUTE_UNUSED,
4877 int flags ATTRIBUTE_UNUSED,
4880 tree decl = TYPE_NAME (*node);
4884 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4885 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4886 *no_add_attrs = false;
4892 /* Return 0 if the attributes for two types are incompatible, 1 if they
4893 are compatible, and 2 if they are nearly compatible (which causes a
4894 warning to be generated). */
4896 arm_comp_type_attributes (const_tree type1, const_tree type2)
4900 /* Check for mismatch of non-default calling convention. */
4901 if (TREE_CODE (type1) != FUNCTION_TYPE)
4904 /* Check for mismatched call attributes. */
4905 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4906 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4907 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4908 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4910 /* Only bother to check if an attribute is defined. */
4911 if (l1 | l2 | s1 | s2)
4913 /* If one type has an attribute, the other must have the same attribute. */
4914 if ((l1 != l2) || (s1 != s2))
4917 /* Disallow mixed attributes. */
4918 if ((l1 & s2) || (l2 & s1))
4922 /* Check for mismatched ISR attribute. */
4923 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4925 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4926 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4928 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4935 /* Assigns default attributes to newly defined type. This is used to
4936 set short_call/long_call attributes for function types of
4937 functions defined inside corresponding #pragma scopes. */
4939 arm_set_default_type_attributes (tree type)
4941 /* Add __attribute__ ((long_call)) to all functions, when
4942 inside #pragma long_calls or __attribute__ ((short_call)),
4943 when inside #pragma no_long_calls. */
4944 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4946 tree type_attr_list, attr_name;
4947 type_attr_list = TYPE_ATTRIBUTES (type);
4949 if (arm_pragma_long_calls == LONG)
4950 attr_name = get_identifier ("long_call");
4951 else if (arm_pragma_long_calls == SHORT)
4952 attr_name = get_identifier ("short_call");
4956 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4957 TYPE_ATTRIBUTES (type) = type_attr_list;
4961 /* Return true if DECL is known to be linked into section SECTION. */
4964 arm_function_in_section_p (tree decl, section *section)
4966 /* We can only be certain about functions defined in the same
4967 compilation unit. */
4968 if (!TREE_STATIC (decl))
4971 /* Make sure that SYMBOL always binds to the definition in this
4972 compilation unit. */
4973 if (!targetm.binds_local_p (decl))
4976 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4977 if (!DECL_SECTION_NAME (decl))
4979 /* Make sure that we will not create a unique section for DECL. */
4980 if (flag_function_sections || DECL_ONE_ONLY (decl))
4984 return function_section (decl) == section;
4987 /* Return nonzero if a 32-bit "long_call" should be generated for
4988 a call from the current function to DECL. We generate a long_call
4991 a. has an __attribute__((long call))
4992 or b. is within the scope of a #pragma long_calls
4993 or c. the -mlong-calls command line switch has been specified
4995 However we do not generate a long call if the function:
4997 d. has an __attribute__ ((short_call))
4998 or e. is inside the scope of a #pragma no_long_calls
4999 or f. is defined in the same section as the current function. */
5002 arm_is_long_call_p (tree decl)
5007 return TARGET_LONG_CALLS;
5009 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5010 if (lookup_attribute ("short_call", attrs))
5013 /* For "f", be conservative, and only cater for cases in which the
5014 whole of the current function is placed in the same section. */
5015 if (!flag_reorder_blocks_and_partition
5016 && TREE_CODE (decl) == FUNCTION_DECL
5017 && arm_function_in_section_p (decl, current_function_section ()))
5020 if (lookup_attribute ("long_call", attrs))
5023 return TARGET_LONG_CALLS;
5026 /* Return nonzero if it is ok to make a tail-call to DECL. */
5028 arm_function_ok_for_sibcall (tree decl, tree exp)
5030 unsigned long func_type;
5032 if (cfun->machine->sibcall_blocked)
5035 /* Never tailcall something for which we have no decl, or if we
5036 are generating code for Thumb-1. */
5037 if (decl == NULL || TARGET_THUMB1)
5040 /* The PIC register is live on entry to VxWorks PLT entries, so we
5041 must make the call before restoring the PIC register. */
5042 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5045 /* Cannot tail-call to long calls, since these are out of range of
5046 a branch instruction. */
5047 if (arm_is_long_call_p (decl))
5050 /* If we are interworking and the function is not declared static
5051 then we can't tail-call it unless we know that it exists in this
5052 compilation unit (since it might be a Thumb routine). */
5053 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5056 func_type = arm_current_func_type ();
5057 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5058 if (IS_INTERRUPT (func_type))
5061 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5063 /* Check that the return value locations are the same. For
5064 example that we aren't returning a value from the sibling in
5065 a VFP register but then need to transfer it to a core
5069 a = arm_function_value (TREE_TYPE (exp), decl, false);
5070 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5072 if (!rtx_equal_p (a, b))
5076 /* Never tailcall if function may be called with a misaligned SP. */
5077 if (IS_STACKALIGN (func_type))
5080 /* Everything else is ok. */
5085 /* Addressing mode support functions. */
5087 /* Return nonzero if X is a legitimate immediate operand when compiling
5088 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5090 legitimate_pic_operand_p (rtx x)
5092 if (GET_CODE (x) == SYMBOL_REF
5093 || (GET_CODE (x) == CONST
5094 && GET_CODE (XEXP (x, 0)) == PLUS
5095 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5101 /* Record that the current function needs a PIC register. Initialize
5102 cfun->machine->pic_reg if we have not already done so. */
5105 require_pic_register (void)
5107 /* A lot of the logic here is made obscure by the fact that this
5108 routine gets called as part of the rtx cost estimation process.
5109 We don't want those calls to affect any assumptions about the real
5110 function; and further, we can't call entry_of_function() until we
5111 start the real expansion process. */
5112 if (!crtl->uses_pic_offset_table)
5114 gcc_assert (can_create_pseudo_p ());
5115 if (arm_pic_register != INVALID_REGNUM)
5117 if (!cfun->machine->pic_reg)
5118 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5120 /* Play games to avoid marking the function as needing pic
5121 if we are being called as part of the cost-estimation
5123 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5124 crtl->uses_pic_offset_table = 1;
5130 if (!cfun->machine->pic_reg)
5131 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5133 /* Play games to avoid marking the function as needing pic
5134 if we are being called as part of the cost-estimation
5136 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5138 crtl->uses_pic_offset_table = 1;
5141 arm_load_pic_register (0UL);
5146 for (insn = seq; insn; insn = NEXT_INSN (insn))
5148 INSN_LOCATOR (insn) = prologue_locator;
5150 /* We can be called during expansion of PHI nodes, where
5151 we can't yet emit instructions directly in the final
5152 insn stream. Queue the insns on the entry edge, they will
5153 be committed after everything else is expanded. */
5154 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5161 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5163 if (GET_CODE (orig) == SYMBOL_REF
5164 || GET_CODE (orig) == LABEL_REF)
5170 gcc_assert (can_create_pseudo_p ());
5171 reg = gen_reg_rtx (Pmode);
5174 /* VxWorks does not impose a fixed gap between segments; the run-time
5175 gap can be different from the object-file gap. We therefore can't
5176 use GOTOFF unless we are absolutely sure that the symbol is in the
5177 same segment as the GOT. Unfortunately, the flexibility of linker
5178 scripts means that we can't be sure of that in general, so assume
5179 that GOTOFF is never valid on VxWorks. */
5180 if ((GET_CODE (orig) == LABEL_REF
5181 || (GET_CODE (orig) == SYMBOL_REF &&
5182 SYMBOL_REF_LOCAL_P (orig)))
5184 && !TARGET_VXWORKS_RTP)
5185 insn = arm_pic_static_addr (orig, reg);
5191 /* If this function doesn't have a pic register, create one now. */
5192 require_pic_register ();
5194 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5196 /* Make the MEM as close to a constant as possible. */
5197 mem = SET_SRC (pat);
5198 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5199 MEM_READONLY_P (mem) = 1;
5200 MEM_NOTRAP_P (mem) = 1;
5202 insn = emit_insn (pat);
5205 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5207 set_unique_reg_note (insn, REG_EQUAL, orig);
5211 else if (GET_CODE (orig) == CONST)
5215 if (GET_CODE (XEXP (orig, 0)) == PLUS
5216 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5219 /* Handle the case where we have: const (UNSPEC_TLS). */
5220 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5221 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5224 /* Handle the case where we have:
5225 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5227 if (GET_CODE (XEXP (orig, 0)) == PLUS
5228 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5229 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5231 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5237 gcc_assert (can_create_pseudo_p ());
5238 reg = gen_reg_rtx (Pmode);
5241 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5243 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5244 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5245 base == reg ? 0 : reg);
5247 if (GET_CODE (offset) == CONST_INT)
5249 /* The base register doesn't really matter, we only want to
5250 test the index for the appropriate mode. */
5251 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5253 gcc_assert (can_create_pseudo_p ());
5254 offset = force_reg (Pmode, offset);
5257 if (GET_CODE (offset) == CONST_INT)
5258 return plus_constant (base, INTVAL (offset));
5261 if (GET_MODE_SIZE (mode) > 4
5262 && (GET_MODE_CLASS (mode) == MODE_INT
5263 || TARGET_SOFT_FLOAT))
5265 emit_insn (gen_addsi3 (reg, base, offset));
5269 return gen_rtx_PLUS (Pmode, base, offset);
5276 /* Find a spare register to use during the prolog of a function. */
5279 thumb_find_work_register (unsigned long pushed_regs_mask)
5283 /* Check the argument registers first as these are call-used. The
5284 register allocation order means that sometimes r3 might be used
5285 but earlier argument registers might not, so check them all. */
5286 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5287 if (!df_regs_ever_live_p (reg))
5290 /* Before going on to check the call-saved registers we can try a couple
5291 more ways of deducing that r3 is available. The first is when we are
5292 pushing anonymous arguments onto the stack and we have less than 4
5293 registers worth of fixed arguments(*). In this case r3 will be part of
5294 the variable argument list and so we can be sure that it will be
5295 pushed right at the start of the function. Hence it will be available
5296 for the rest of the prologue.
5297 (*): ie crtl->args.pretend_args_size is greater than 0. */
5298 if (cfun->machine->uses_anonymous_args
5299 && crtl->args.pretend_args_size > 0)
5300 return LAST_ARG_REGNUM;
5302 /* The other case is when we have fixed arguments but less than 4 registers
5303 worth. In this case r3 might be used in the body of the function, but
5304 it is not being used to convey an argument into the function. In theory
5305 we could just check crtl->args.size to see how many bytes are
5306 being passed in argument registers, but it seems that it is unreliable.
5307 Sometimes it will have the value 0 when in fact arguments are being
5308 passed. (See testcase execute/20021111-1.c for an example). So we also
5309 check the args_info.nregs field as well. The problem with this field is
5310 that it makes no allowances for arguments that are passed to the
5311 function but which are not used. Hence we could miss an opportunity
5312 when a function has an unused argument in r3. But it is better to be
5313 safe than to be sorry. */
5314 if (! cfun->machine->uses_anonymous_args
5315 && crtl->args.size >= 0
5316 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5317 && crtl->args.info.nregs < 4)
5318 return LAST_ARG_REGNUM;
5320 /* Otherwise look for a call-saved register that is going to be pushed. */
5321 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5322 if (pushed_regs_mask & (1 << reg))
5327 /* Thumb-2 can use high regs. */
5328 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5329 if (pushed_regs_mask & (1 << reg))
5332 /* Something went wrong - thumb_compute_save_reg_mask()
5333 should have arranged for a suitable register to be pushed. */
5337 static GTY(()) int pic_labelno;
5339 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5343 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5345 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5347 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5350 gcc_assert (flag_pic);
5352 pic_reg = cfun->machine->pic_reg;
5353 if (TARGET_VXWORKS_RTP)
5355 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5356 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5357 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5359 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5361 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5362 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5366 /* We use an UNSPEC rather than a LABEL_REF because this label
5367 never appears in the code stream. */
5369 labelno = GEN_INT (pic_labelno++);
5370 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5371 l1 = gen_rtx_CONST (VOIDmode, l1);
5373 /* On the ARM the PC register contains 'dot + 8' at the time of the
5374 addition, on the Thumb it is 'dot + 4'. */
5375 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5376 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5378 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5382 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5384 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5386 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5388 else /* TARGET_THUMB1 */
5390 if (arm_pic_register != INVALID_REGNUM
5391 && REGNO (pic_reg) > LAST_LO_REGNUM)
5393 /* We will have pushed the pic register, so we should always be
5394 able to find a work register. */
5395 pic_tmp = gen_rtx_REG (SImode,
5396 thumb_find_work_register (saved_regs));
5397 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5398 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5401 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5402 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5406 /* Need to emit this whether or not we obey regdecls,
5407 since setjmp/longjmp can cause life info to screw up. */
5411 /* Generate code to load the address of a static var when flag_pic is set. */
5413 arm_pic_static_addr (rtx orig, rtx reg)
5415 rtx l1, labelno, offset_rtx, insn;
5417 gcc_assert (flag_pic);
5419 /* We use an UNSPEC rather than a LABEL_REF because this label
5420 never appears in the code stream. */
5421 labelno = GEN_INT (pic_labelno++);
5422 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5423 l1 = gen_rtx_CONST (VOIDmode, l1);
5425 /* On the ARM the PC register contains 'dot + 8' at the time of the
5426 addition, on the Thumb it is 'dot + 4'. */
5427 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5428 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5429 UNSPEC_SYMBOL_OFFSET);
5430 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5434 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5436 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5438 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5440 else /* TARGET_THUMB1 */
5442 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5443 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5449 /* Return nonzero if X is valid as an ARM state addressing register. */
5451 arm_address_register_rtx_p (rtx x, int strict_p)
5455 if (GET_CODE (x) != REG)
5461 return ARM_REGNO_OK_FOR_BASE_P (regno);
5463 return (regno <= LAST_ARM_REGNUM
5464 || regno >= FIRST_PSEUDO_REGISTER
5465 || regno == FRAME_POINTER_REGNUM
5466 || regno == ARG_POINTER_REGNUM);
5469 /* Return TRUE if this rtx is the difference of a symbol and a label,
5470 and will reduce to a PC-relative relocation in the object file.
5471 Expressions like this can be left alone when generating PIC, rather
5472 than forced through the GOT. */
5474 pcrel_constant_p (rtx x)
5476 if (GET_CODE (x) == MINUS)
5477 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5482 /* Return true if X will surely end up in an index register after next
5485 will_be_in_index_register (const_rtx x)
5487 /* arm.md: calculate_pic_address will split this into a register. */
5488 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5491 /* Return nonzero if X is a valid ARM state address operand. */
5493 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5497 enum rtx_code code = GET_CODE (x);
5499 if (arm_address_register_rtx_p (x, strict_p))
5502 use_ldrd = (TARGET_LDRD
5504 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5506 if (code == POST_INC || code == PRE_DEC
5507 || ((code == PRE_INC || code == POST_DEC)
5508 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5509 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5511 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5512 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5513 && GET_CODE (XEXP (x, 1)) == PLUS
5514 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5516 rtx addend = XEXP (XEXP (x, 1), 1);
5518 /* Don't allow ldrd post increment by register because it's hard
5519 to fixup invalid register choices. */
5521 && GET_CODE (x) == POST_MODIFY
5522 && GET_CODE (addend) == REG)
5525 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5526 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5529 /* After reload constants split into minipools will have addresses
5530 from a LABEL_REF. */
5531 else if (reload_completed
5532 && (code == LABEL_REF
5534 && GET_CODE (XEXP (x, 0)) == PLUS
5535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5536 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5539 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5542 else if (code == PLUS)
5544 rtx xop0 = XEXP (x, 0);
5545 rtx xop1 = XEXP (x, 1);
5547 return ((arm_address_register_rtx_p (xop0, strict_p)
5548 && ((GET_CODE(xop1) == CONST_INT
5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5550 || (!strict_p && will_be_in_index_register (xop1))))
5551 || (arm_address_register_rtx_p (xop1, strict_p)
5552 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5556 /* Reload currently can't handle MINUS, so disable this for now */
5557 else if (GET_CODE (x) == MINUS)
5559 rtx xop0 = XEXP (x, 0);
5560 rtx xop1 = XEXP (x, 1);
5562 return (arm_address_register_rtx_p (xop0, strict_p)
5563 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5567 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5568 && code == SYMBOL_REF
5569 && CONSTANT_POOL_ADDRESS_P (x)
5571 && symbol_mentioned_p (get_pool_constant (x))
5572 && ! pcrel_constant_p (get_pool_constant (x))))
5578 /* Return nonzero if X is a valid Thumb-2 address operand. */
5580 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5583 enum rtx_code code = GET_CODE (x);
5585 if (arm_address_register_rtx_p (x, strict_p))
5588 use_ldrd = (TARGET_LDRD
5590 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5592 if (code == POST_INC || code == PRE_DEC
5593 || ((code == PRE_INC || code == POST_DEC)
5594 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5595 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5597 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5598 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5599 && GET_CODE (XEXP (x, 1)) == PLUS
5600 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5602 /* Thumb-2 only has autoincrement by constant. */
5603 rtx addend = XEXP (XEXP (x, 1), 1);
5604 HOST_WIDE_INT offset;
5606 if (GET_CODE (addend) != CONST_INT)
5609 offset = INTVAL(addend);
5610 if (GET_MODE_SIZE (mode) <= 4)
5611 return (offset > -256 && offset < 256);
5613 return (use_ldrd && offset > -1024 && offset < 1024
5614 && (offset & 3) == 0);
5617 /* After reload constants split into minipools will have addresses
5618 from a LABEL_REF. */
5619 else if (reload_completed
5620 && (code == LABEL_REF
5622 && GET_CODE (XEXP (x, 0)) == PLUS
5623 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5624 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5627 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5630 else if (code == PLUS)
5632 rtx xop0 = XEXP (x, 0);
5633 rtx xop1 = XEXP (x, 1);
5635 return ((arm_address_register_rtx_p (xop0, strict_p)
5636 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5637 || (!strict_p && will_be_in_index_register (xop1))))
5638 || (arm_address_register_rtx_p (xop1, strict_p)
5639 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5642 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5643 && code == SYMBOL_REF
5644 && CONSTANT_POOL_ADDRESS_P (x)
5646 && symbol_mentioned_p (get_pool_constant (x))
5647 && ! pcrel_constant_p (get_pool_constant (x))))
5653 /* Return nonzero if INDEX is valid for an address index operand in
5656 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5659 HOST_WIDE_INT range;
5660 enum rtx_code code = GET_CODE (index);
5662 /* Standard coprocessor addressing modes. */
5663 if (TARGET_HARD_FLOAT
5664 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5665 && (mode == SFmode || mode == DFmode
5666 || (TARGET_MAVERICK && mode == DImode)))
5667 return (code == CONST_INT && INTVAL (index) < 1024
5668 && INTVAL (index) > -1024
5669 && (INTVAL (index) & 3) == 0);
5671 /* For quad modes, we restrict the constant offset to be slightly less
5672 than what the instruction format permits. We do this because for
5673 quad mode moves, we will actually decompose them into two separate
5674 double-mode reads or writes. INDEX must therefore be a valid
5675 (double-mode) offset and so should INDEX+8. */
5676 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1016
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 /* We have no such constraint on double mode offsets, so we permit the
5683 full range of the instruction format. */
5684 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5685 return (code == CONST_INT
5686 && INTVAL (index) < 1024
5687 && INTVAL (index) > -1024
5688 && (INTVAL (index) & 3) == 0);
5690 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5691 return (code == CONST_INT
5692 && INTVAL (index) < 1024
5693 && INTVAL (index) > -1024
5694 && (INTVAL (index) & 3) == 0);
5696 if (arm_address_register_rtx_p (index, strict_p)
5697 && (GET_MODE_SIZE (mode) <= 4))
5700 if (mode == DImode || mode == DFmode)
5702 if (code == CONST_INT)
5704 HOST_WIDE_INT val = INTVAL (index);
5707 return val > -256 && val < 256;
5709 return val > -4096 && val < 4092;
5712 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5715 if (GET_MODE_SIZE (mode) <= 4
5719 || (mode == QImode && outer == SIGN_EXTEND))))
5723 rtx xiop0 = XEXP (index, 0);
5724 rtx xiop1 = XEXP (index, 1);
5726 return ((arm_address_register_rtx_p (xiop0, strict_p)
5727 && power_of_two_operand (xiop1, SImode))
5728 || (arm_address_register_rtx_p (xiop1, strict_p)
5729 && power_of_two_operand (xiop0, SImode)));
5731 else if (code == LSHIFTRT || code == ASHIFTRT
5732 || code == ASHIFT || code == ROTATERT)
5734 rtx op = XEXP (index, 1);
5736 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5737 && GET_CODE (op) == CONST_INT
5739 && INTVAL (op) <= 31);
5743 /* For ARM v4 we may be doing a sign-extend operation during the
5749 || (outer == SIGN_EXTEND && mode == QImode))
5755 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5757 return (code == CONST_INT
5758 && INTVAL (index) < range
5759 && INTVAL (index) > -range);
5762 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5763 index operand. i.e. 1, 2, 4 or 8. */
5765 thumb2_index_mul_operand (rtx op)
5769 if (GET_CODE(op) != CONST_INT)
5773 return (val == 1 || val == 2 || val == 4 || val == 8);
5776 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5778 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5780 enum rtx_code code = GET_CODE (index);
5782 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5783 /* Standard coprocessor addressing modes. */
5784 if (TARGET_HARD_FLOAT
5785 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5786 && (mode == SFmode || mode == DFmode
5787 || (TARGET_MAVERICK && mode == DImode)))
5788 return (code == CONST_INT && INTVAL (index) < 1024
5789 && INTVAL (index) > -1024
5790 && (INTVAL (index) & 3) == 0);
5792 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5794 /* For DImode assume values will usually live in core regs
5795 and only allow LDRD addressing modes. */
5796 if (!TARGET_LDRD || mode != DImode)
5797 return (code == CONST_INT
5798 && INTVAL (index) < 1024
5799 && INTVAL (index) > -1024
5800 && (INTVAL (index) & 3) == 0);
5803 /* For quad modes, we restrict the constant offset to be slightly less
5804 than what the instruction format permits. We do this because for
5805 quad mode moves, we will actually decompose them into two separate
5806 double-mode reads or writes. INDEX must therefore be a valid
5807 (double-mode) offset and so should INDEX+8. */
5808 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5809 return (code == CONST_INT
5810 && INTVAL (index) < 1016
5811 && INTVAL (index) > -1024
5812 && (INTVAL (index) & 3) == 0);
5814 /* We have no such constraint on double mode offsets, so we permit the
5815 full range of the instruction format. */
5816 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5817 return (code == CONST_INT
5818 && INTVAL (index) < 1024
5819 && INTVAL (index) > -1024
5820 && (INTVAL (index) & 3) == 0);
5822 if (arm_address_register_rtx_p (index, strict_p)
5823 && (GET_MODE_SIZE (mode) <= 4))
5826 if (mode == DImode || mode == DFmode)
5828 if (code == CONST_INT)
5830 HOST_WIDE_INT val = INTVAL (index);
5831 /* ??? Can we assume ldrd for thumb2? */
5832 /* Thumb-2 ldrd only has reg+const addressing modes. */
5833 /* ldrd supports offsets of +-1020.
5834 However the ldr fallback does not. */
5835 return val > -256 && val < 256 && (val & 3) == 0;
5843 rtx xiop0 = XEXP (index, 0);
5844 rtx xiop1 = XEXP (index, 1);
5846 return ((arm_address_register_rtx_p (xiop0, strict_p)
5847 && thumb2_index_mul_operand (xiop1))
5848 || (arm_address_register_rtx_p (xiop1, strict_p)
5849 && thumb2_index_mul_operand (xiop0)));
5851 else if (code == ASHIFT)
5853 rtx op = XEXP (index, 1);
5855 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5856 && GET_CODE (op) == CONST_INT
5858 && INTVAL (op) <= 3);
5861 return (code == CONST_INT
5862 && INTVAL (index) < 4096
5863 && INTVAL (index) > -256);
5866 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5868 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5872 if (GET_CODE (x) != REG)
5878 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5880 return (regno <= LAST_LO_REGNUM
5881 || regno > LAST_VIRTUAL_REGISTER
5882 || regno == FRAME_POINTER_REGNUM
5883 || (GET_MODE_SIZE (mode) >= 4
5884 && (regno == STACK_POINTER_REGNUM
5885 || regno >= FIRST_PSEUDO_REGISTER
5886 || x == hard_frame_pointer_rtx
5887 || x == arg_pointer_rtx)));
5890 /* Return nonzero if x is a legitimate index register. This is the case
5891 for any base register that can access a QImode object. */
5893 thumb1_index_register_rtx_p (rtx x, int strict_p)
5895 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5898 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5900 The AP may be eliminated to either the SP or the FP, so we use the
5901 least common denominator, e.g. SImode, and offsets from 0 to 64.
5903 ??? Verify whether the above is the right approach.
5905 ??? Also, the FP may be eliminated to the SP, so perhaps that
5906 needs special handling also.
5908 ??? Look at how the mips16 port solves this problem. It probably uses
5909 better ways to solve some of these problems.
5911 Although it is not incorrect, we don't accept QImode and HImode
5912 addresses based on the frame pointer or arg pointer until the
5913 reload pass starts. This is so that eliminating such addresses
5914 into stack based ones won't produce impossible code. */
5916 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5918 /* ??? Not clear if this is right. Experiment. */
5919 if (GET_MODE_SIZE (mode) < 4
5920 && !(reload_in_progress || reload_completed)
5921 && (reg_mentioned_p (frame_pointer_rtx, x)
5922 || reg_mentioned_p (arg_pointer_rtx, x)
5923 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5924 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5925 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5926 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5929 /* Accept any base register. SP only in SImode or larger. */
5930 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5933 /* This is PC relative data before arm_reorg runs. */
5934 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5935 && GET_CODE (x) == SYMBOL_REF
5936 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5939 /* This is PC relative data after arm_reorg runs. */
5940 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5942 && (GET_CODE (x) == LABEL_REF
5943 || (GET_CODE (x) == CONST
5944 && GET_CODE (XEXP (x, 0)) == PLUS
5945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5949 /* Post-inc indexing only supported for SImode and larger. */
5950 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5951 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5954 else if (GET_CODE (x) == PLUS)
5956 /* REG+REG address can be any two index registers. */
5957 /* We disallow FRAME+REG addressing since we know that FRAME
5958 will be replaced with STACK, and SP relative addressing only
5959 permits SP+OFFSET. */
5960 if (GET_MODE_SIZE (mode) <= 4
5961 && XEXP (x, 0) != frame_pointer_rtx
5962 && XEXP (x, 1) != frame_pointer_rtx
5963 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5964 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5965 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5968 /* REG+const has 5-7 bit offset for non-SP registers. */
5969 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5970 || XEXP (x, 0) == arg_pointer_rtx)
5971 && GET_CODE (XEXP (x, 1)) == CONST_INT
5972 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5975 /* REG+const has 10-bit offset for SP, but only SImode and
5976 larger is supported. */
5977 /* ??? Should probably check for DI/DFmode overflow here
5978 just like GO_IF_LEGITIMATE_OFFSET does. */
5979 else if (GET_CODE (XEXP (x, 0)) == REG
5980 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5981 && GET_MODE_SIZE (mode) >= 4
5982 && GET_CODE (XEXP (x, 1)) == CONST_INT
5983 && INTVAL (XEXP (x, 1)) >= 0
5984 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5985 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5988 else if (GET_CODE (XEXP (x, 0)) == REG
5989 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5990 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5991 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5992 && REGNO (XEXP (x, 0))
5993 <= LAST_VIRTUAL_POINTER_REGISTER))
5994 && GET_MODE_SIZE (mode) >= 4
5995 && GET_CODE (XEXP (x, 1)) == CONST_INT
5996 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6000 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6001 && GET_MODE_SIZE (mode) == 4
6002 && GET_CODE (x) == SYMBOL_REF
6003 && CONSTANT_POOL_ADDRESS_P (x)
6005 && symbol_mentioned_p (get_pool_constant (x))
6006 && ! pcrel_constant_p (get_pool_constant (x))))
6012 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6013 instruction of mode MODE. */
6015 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6017 switch (GET_MODE_SIZE (mode))
6020 return val >= 0 && val < 32;
6023 return val >= 0 && val < 64 && (val & 1) == 0;
6027 && (val + GET_MODE_SIZE (mode)) <= 128
6033 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6036 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6037 else if (TARGET_THUMB2)
6038 return thumb2_legitimate_address_p (mode, x, strict_p);
6039 else /* if (TARGET_THUMB1) */
6040 return thumb1_legitimate_address_p (mode, x, strict_p);
6043 /* Build the SYMBOL_REF for __tls_get_addr. */
6045 static GTY(()) rtx tls_get_addr_libfunc;
6048 get_tls_get_addr (void)
6050 if (!tls_get_addr_libfunc)
6051 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6052 return tls_get_addr_libfunc;
6056 arm_load_tp (rtx target)
6059 target = gen_reg_rtx (SImode);
6063 /* Can return in any reg. */
6064 emit_insn (gen_load_tp_hard (target));
6068 /* Always returned in r0. Immediately copy the result into a pseudo,
6069 otherwise other uses of r0 (e.g. setting up function arguments) may
6070 clobber the value. */
6074 emit_insn (gen_load_tp_soft ());
6076 tmp = gen_rtx_REG (SImode, 0);
6077 emit_move_insn (target, tmp);
6083 load_tls_operand (rtx x, rtx reg)
6087 if (reg == NULL_RTX)
6088 reg = gen_reg_rtx (SImode);
6090 tmp = gen_rtx_CONST (SImode, x);
6092 emit_move_insn (reg, tmp);
6098 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6100 rtx insns, label, labelno, sum;
6104 labelno = GEN_INT (pic_labelno++);
6105 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6106 label = gen_rtx_CONST (VOIDmode, label);
6108 sum = gen_rtx_UNSPEC (Pmode,
6109 gen_rtvec (4, x, GEN_INT (reloc), label,
6110 GEN_INT (TARGET_ARM ? 8 : 4)),
6112 reg = load_tls_operand (sum, reg);
6115 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6116 else if (TARGET_THUMB2)
6117 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6118 else /* TARGET_THUMB1 */
6119 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6121 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6122 Pmode, 1, reg, Pmode);
6124 insns = get_insns ();
6131 legitimize_tls_address (rtx x, rtx reg)
6133 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6134 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6138 case TLS_MODEL_GLOBAL_DYNAMIC:
6139 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6140 dest = gen_reg_rtx (Pmode);
6141 emit_libcall_block (insns, dest, ret, x);
6144 case TLS_MODEL_LOCAL_DYNAMIC:
6145 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6147 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6148 share the LDM result with other LD model accesses. */
6149 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6151 dest = gen_reg_rtx (Pmode);
6152 emit_libcall_block (insns, dest, ret, eqv);
6154 /* Load the addend. */
6155 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6157 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6158 return gen_rtx_PLUS (Pmode, dest, addend);
6160 case TLS_MODEL_INITIAL_EXEC:
6161 labelno = GEN_INT (pic_labelno++);
6162 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6163 label = gen_rtx_CONST (VOIDmode, label);
6164 sum = gen_rtx_UNSPEC (Pmode,
6165 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6166 GEN_INT (TARGET_ARM ? 8 : 4)),
6168 reg = load_tls_operand (sum, reg);
6171 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6172 else if (TARGET_THUMB2)
6173 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6176 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6177 emit_move_insn (reg, gen_const_mem (SImode, reg));
6180 tp = arm_load_tp (NULL_RTX);
6182 return gen_rtx_PLUS (Pmode, tp, reg);
6184 case TLS_MODEL_LOCAL_EXEC:
6185 tp = arm_load_tp (NULL_RTX);
6187 reg = gen_rtx_UNSPEC (Pmode,
6188 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6190 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6192 return gen_rtx_PLUS (Pmode, tp, reg);
6199 /* Try machine-dependent ways of modifying an illegitimate address
6200 to be legitimate. If we find one, return the new, valid address. */
6202 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6206 /* TODO: legitimize_address for Thumb2. */
6209 return thumb_legitimize_address (x, orig_x, mode);
6212 if (arm_tls_symbol_p (x))
6213 return legitimize_tls_address (x, NULL_RTX);
6215 if (GET_CODE (x) == PLUS)
6217 rtx xop0 = XEXP (x, 0);
6218 rtx xop1 = XEXP (x, 1);
6220 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6221 xop0 = force_reg (SImode, xop0);
6223 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6224 xop1 = force_reg (SImode, xop1);
6226 if (ARM_BASE_REGISTER_RTX_P (xop0)
6227 && GET_CODE (xop1) == CONST_INT)
6229 HOST_WIDE_INT n, low_n;
6233 /* VFP addressing modes actually allow greater offsets, but for
6234 now we just stick with the lowest common denominator. */
6236 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6248 low_n = ((mode) == TImode ? 0
6249 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6253 base_reg = gen_reg_rtx (SImode);
6254 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6255 emit_move_insn (base_reg, val);
6256 x = plus_constant (base_reg, low_n);
6258 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6259 x = gen_rtx_PLUS (SImode, xop0, xop1);
6262 /* XXX We don't allow MINUS any more -- see comment in
6263 arm_legitimate_address_outer_p (). */
6264 else if (GET_CODE (x) == MINUS)
6266 rtx xop0 = XEXP (x, 0);
6267 rtx xop1 = XEXP (x, 1);
6269 if (CONSTANT_P (xop0))
6270 xop0 = force_reg (SImode, xop0);
6272 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6273 xop1 = force_reg (SImode, xop1);
6275 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6276 x = gen_rtx_MINUS (SImode, xop0, xop1);
6279 /* Make sure to take full advantage of the pre-indexed addressing mode
6280 with absolute addresses which often allows for the base register to
6281 be factorized for multiple adjacent memory references, and it might
6282 even allows for the mini pool to be avoided entirely. */
6283 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6286 HOST_WIDE_INT mask, base, index;
6289 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6290 use a 8-bit index. So let's use a 12-bit index for SImode only and
6291 hope that arm_gen_constant will enable ldrb to use more bits. */
6292 bits = (mode == SImode) ? 12 : 8;
6293 mask = (1 << bits) - 1;
6294 base = INTVAL (x) & ~mask;
6295 index = INTVAL (x) & mask;
6296 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6298 /* It'll most probably be more efficient to generate the base
6299 with more bits set and use a negative index instead. */
6303 base_reg = force_reg (SImode, GEN_INT (base));
6304 x = plus_constant (base_reg, index);
6309 /* We need to find and carefully transform any SYMBOL and LABEL
6310 references; so go back to the original address expression. */
6311 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6313 if (new_x != orig_x)
6321 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6322 to be legitimate. If we find one, return the new, valid address. */
6324 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6326 if (arm_tls_symbol_p (x))
6327 return legitimize_tls_address (x, NULL_RTX);
6329 if (GET_CODE (x) == PLUS
6330 && GET_CODE (XEXP (x, 1)) == CONST_INT
6331 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6332 || INTVAL (XEXP (x, 1)) < 0))
6334 rtx xop0 = XEXP (x, 0);
6335 rtx xop1 = XEXP (x, 1);
6336 HOST_WIDE_INT offset = INTVAL (xop1);
6338 /* Try and fold the offset into a biasing of the base register and
6339 then offsetting that. Don't do this when optimizing for space
6340 since it can cause too many CSEs. */
6341 if (optimize_size && offset >= 0
6342 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6344 HOST_WIDE_INT delta;
6347 delta = offset - (256 - GET_MODE_SIZE (mode));
6348 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6349 delta = 31 * GET_MODE_SIZE (mode);
6351 delta = offset & (~31 * GET_MODE_SIZE (mode));
6353 xop0 = force_operand (plus_constant (xop0, offset - delta),
6355 x = plus_constant (xop0, delta);
6357 else if (offset < 0 && offset > -256)
6358 /* Small negative offsets are best done with a subtract before the
6359 dereference, forcing these into a register normally takes two
6361 x = force_operand (x, NULL_RTX);
6364 /* For the remaining cases, force the constant into a register. */
6365 xop1 = force_reg (SImode, xop1);
6366 x = gen_rtx_PLUS (SImode, xop0, xop1);
6369 else if (GET_CODE (x) == PLUS
6370 && s_register_operand (XEXP (x, 1), SImode)
6371 && !s_register_operand (XEXP (x, 0), SImode))
6373 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6375 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6380 /* We need to find and carefully transform any SYMBOL and LABEL
6381 references; so go back to the original address expression. */
6382 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6384 if (new_x != orig_x)
6392 thumb_legitimize_reload_address (rtx *x_p,
6393 enum machine_mode mode,
6394 int opnum, int type,
6395 int ind_levels ATTRIBUTE_UNUSED)
6399 if (GET_CODE (x) == PLUS
6400 && GET_MODE_SIZE (mode) < 4
6401 && REG_P (XEXP (x, 0))
6402 && XEXP (x, 0) == stack_pointer_rtx
6403 && GET_CODE (XEXP (x, 1)) == CONST_INT
6404 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6409 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6410 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6414 /* If both registers are hi-regs, then it's better to reload the
6415 entire expression rather than each register individually. That
6416 only requires one reload register rather than two. */
6417 if (GET_CODE (x) == PLUS
6418 && REG_P (XEXP (x, 0))
6419 && REG_P (XEXP (x, 1))
6420 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6421 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6426 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6427 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6434 /* Test for various thread-local symbols. */
6436 /* Return TRUE if X is a thread-local symbol. */
6439 arm_tls_symbol_p (rtx x)
6441 if (! TARGET_HAVE_TLS)
6444 if (GET_CODE (x) != SYMBOL_REF)
6447 return SYMBOL_REF_TLS_MODEL (x) != 0;
6450 /* Helper for arm_tls_referenced_p. */
6453 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6455 if (GET_CODE (*x) == SYMBOL_REF)
6456 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6458 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6459 TLS offsets, not real symbol references. */
6460 if (GET_CODE (*x) == UNSPEC
6461 && XINT (*x, 1) == UNSPEC_TLS)
6467 /* Return TRUE if X contains any TLS symbol references. */
6470 arm_tls_referenced_p (rtx x)
6472 if (! TARGET_HAVE_TLS)
6475 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6478 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6481 arm_cannot_force_const_mem (rtx x)
6485 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6487 split_const (x, &base, &offset);
6488 if (GET_CODE (base) == SYMBOL_REF
6489 && !offset_within_block_p (base, INTVAL (offset)))
6492 return arm_tls_referenced_p (x);
6495 #define REG_OR_SUBREG_REG(X) \
6496 (GET_CODE (X) == REG \
6497 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6499 #define REG_OR_SUBREG_RTX(X) \
6500 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6503 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6505 enum machine_mode mode = GET_MODE (x);
6519 return COSTS_N_INSNS (1);
6522 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6525 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6532 return COSTS_N_INSNS (2) + cycles;
6534 return COSTS_N_INSNS (1) + 16;
6537 return (COSTS_N_INSNS (1)
6538 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6539 + GET_CODE (SET_DEST (x)) == MEM));
6544 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6546 if (thumb_shiftable_const (INTVAL (x)))
6547 return COSTS_N_INSNS (2);
6548 return COSTS_N_INSNS (3);
6550 else if ((outer == PLUS || outer == COMPARE)
6551 && INTVAL (x) < 256 && INTVAL (x) > -256)
6553 else if ((outer == IOR || outer == XOR || outer == AND)
6554 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6555 return COSTS_N_INSNS (1);
6556 else if (outer == AND)
6559 /* This duplicates the tests in the andsi3 expander. */
6560 for (i = 9; i <= 31; i++)
6561 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6562 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6563 return COSTS_N_INSNS (2);
6565 else if (outer == ASHIFT || outer == ASHIFTRT
6566 || outer == LSHIFTRT)
6568 return COSTS_N_INSNS (2);
6574 return COSTS_N_INSNS (3);
6592 /* XXX another guess. */
6593 /* Memory costs quite a lot for the first word, but subsequent words
6594 load at the equivalent of a single insn each. */
6595 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6596 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6601 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6607 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6608 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6614 return total + COSTS_N_INSNS (1);
6616 /* Assume a two-shift sequence. Increase the cost slightly so
6617 we prefer actual shifts over an extend operation. */
6618 return total + 1 + COSTS_N_INSNS (2);
6626 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6628 enum machine_mode mode = GET_MODE (x);
6629 enum rtx_code subcode;
6631 enum rtx_code code = GET_CODE (x);
6637 /* Memory costs quite a lot for the first word, but subsequent words
6638 load at the equivalent of a single insn each. */
6639 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6646 if (TARGET_HARD_FLOAT && mode == SFmode)
6647 *total = COSTS_N_INSNS (2);
6648 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6649 *total = COSTS_N_INSNS (4);
6651 *total = COSTS_N_INSNS (20);
6655 if (GET_CODE (XEXP (x, 1)) == REG)
6656 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6657 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6658 *total = rtx_cost (XEXP (x, 1), code, speed);
6664 *total += COSTS_N_INSNS (4);
6669 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6670 *total += rtx_cost (XEXP (x, 0), code, speed);
6673 *total += COSTS_N_INSNS (3);
6677 *total += COSTS_N_INSNS (1);
6678 /* Increase the cost of complex shifts because they aren't any faster,
6679 and reduce dual issue opportunities. */
6680 if (arm_tune_cortex_a9
6681 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6689 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6690 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6691 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6693 *total += rtx_cost (XEXP (x, 1), code, speed);
6697 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6698 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6700 *total += rtx_cost (XEXP (x, 0), code, speed);
6707 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6709 if (TARGET_HARD_FLOAT
6711 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6713 *total = COSTS_N_INSNS (1);
6714 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6715 && arm_const_double_rtx (XEXP (x, 0)))
6717 *total += rtx_cost (XEXP (x, 1), code, speed);
6721 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6722 && arm_const_double_rtx (XEXP (x, 1)))
6724 *total += rtx_cost (XEXP (x, 0), code, speed);
6730 *total = COSTS_N_INSNS (20);
6734 *total = COSTS_N_INSNS (1);
6735 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6736 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6738 *total += rtx_cost (XEXP (x, 1), code, speed);
6742 subcode = GET_CODE (XEXP (x, 1));
6743 if (subcode == ASHIFT || subcode == ASHIFTRT
6744 || subcode == LSHIFTRT
6745 || subcode == ROTATE || subcode == ROTATERT)
6747 *total += rtx_cost (XEXP (x, 0), code, speed);
6748 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6752 /* A shift as a part of RSB costs no more than RSB itself. */
6753 if (GET_CODE (XEXP (x, 0)) == MULT
6754 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6756 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6757 *total += rtx_cost (XEXP (x, 1), code, speed);
6762 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6764 *total += rtx_cost (XEXP (x, 0), code, speed);
6765 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6769 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6770 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6772 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6773 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6774 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6775 *total += COSTS_N_INSNS (1);
6783 if (code == PLUS && arm_arch6 && mode == SImode
6784 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6785 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6787 *total = COSTS_N_INSNS (1);
6788 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6790 *total += rtx_cost (XEXP (x, 1), code, speed);
6794 /* MLA: All arguments must be registers. We filter out
6795 multiplication by a power of two, so that we fall down into
6797 if (GET_CODE (XEXP (x, 0)) == MULT
6798 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6800 /* The cost comes from the cost of the multiply. */
6804 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6806 if (TARGET_HARD_FLOAT
6808 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6810 *total = COSTS_N_INSNS (1);
6811 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6812 && arm_const_double_rtx (XEXP (x, 1)))
6814 *total += rtx_cost (XEXP (x, 0), code, speed);
6821 *total = COSTS_N_INSNS (20);
6825 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6826 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6828 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6829 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6830 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6831 *total += COSTS_N_INSNS (1);
6837 case AND: case XOR: case IOR:
6839 /* Normally the frame registers will be spilt into reg+const during
6840 reload, so it is a bad idea to combine them with other instructions,
6841 since then they might not be moved outside of loops. As a compromise
6842 we allow integration with ops that have a constant as their second
6844 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6845 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6846 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6847 *total = COSTS_N_INSNS (1);
6851 *total += COSTS_N_INSNS (2);
6852 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6853 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6855 *total += rtx_cost (XEXP (x, 0), code, speed);
6862 *total += COSTS_N_INSNS (1);
6863 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6864 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6866 *total += rtx_cost (XEXP (x, 0), code, speed);
6869 subcode = GET_CODE (XEXP (x, 0));
6870 if (subcode == ASHIFT || subcode == ASHIFTRT
6871 || subcode == LSHIFTRT
6872 || subcode == ROTATE || subcode == ROTATERT)
6874 *total += rtx_cost (XEXP (x, 1), code, speed);
6875 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6880 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6882 *total += rtx_cost (XEXP (x, 1), code, speed);
6883 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6887 if (subcode == UMIN || subcode == UMAX
6888 || subcode == SMIN || subcode == SMAX)
6890 *total = COSTS_N_INSNS (3);
6897 /* This should have been handled by the CPU specific routines. */
6901 if (arm_arch3m && mode == SImode
6902 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6903 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6904 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6905 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6906 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6907 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6909 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6912 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6916 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6918 if (TARGET_HARD_FLOAT
6920 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6922 *total = COSTS_N_INSNS (1);
6925 *total = COSTS_N_INSNS (2);
6931 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6932 if (mode == SImode && code == NOT)
6934 subcode = GET_CODE (XEXP (x, 0));
6935 if (subcode == ASHIFT || subcode == ASHIFTRT
6936 || subcode == LSHIFTRT
6937 || subcode == ROTATE || subcode == ROTATERT
6939 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6941 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6942 /* Register shifts cost an extra cycle. */
6943 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6944 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6953 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6955 *total = COSTS_N_INSNS (4);
6959 operand = XEXP (x, 0);
6961 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6962 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6963 && GET_CODE (XEXP (operand, 0)) == REG
6964 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6965 *total += COSTS_N_INSNS (1);
6966 *total += (rtx_cost (XEXP (x, 1), code, speed)
6967 + rtx_cost (XEXP (x, 2), code, speed));
6971 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6973 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6979 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6980 && mode == SImode && XEXP (x, 1) == const0_rtx)
6982 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6988 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6989 && mode == SImode && XEXP (x, 1) == const0_rtx)
6991 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7011 /* SCC insns. In the case where the comparison has already been
7012 performed, then they cost 2 instructions. Otherwise they need
7013 an additional comparison before them. */
7014 *total = COSTS_N_INSNS (2);
7015 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7022 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7028 *total += COSTS_N_INSNS (1);
7029 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7030 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7032 *total += rtx_cost (XEXP (x, 0), code, speed);
7036 subcode = GET_CODE (XEXP (x, 0));
7037 if (subcode == ASHIFT || subcode == ASHIFTRT
7038 || subcode == LSHIFTRT
7039 || subcode == ROTATE || subcode == ROTATERT)
7041 *total += rtx_cost (XEXP (x, 1), code, speed);
7042 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7047 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7049 *total += rtx_cost (XEXP (x, 1), code, speed);
7050 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7060 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7061 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7062 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7063 *total += rtx_cost (XEXP (x, 1), code, speed);
7067 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7069 if (TARGET_HARD_FLOAT
7071 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7073 *total = COSTS_N_INSNS (1);
7076 *total = COSTS_N_INSNS (20);
7079 *total = COSTS_N_INSNS (1);
7081 *total += COSTS_N_INSNS (3);
7087 if (GET_MODE_CLASS (mode) == MODE_INT)
7089 rtx op = XEXP (x, 0);
7090 enum machine_mode opmode = GET_MODE (op);
7093 *total += COSTS_N_INSNS (1);
7095 if (opmode != SImode)
7099 /* If !arm_arch4, we use one of the extendhisi2_mem
7100 or movhi_bytes patterns for HImode. For a QImode
7101 sign extension, we first zero-extend from memory
7102 and then perform a shift sequence. */
7103 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7104 *total += COSTS_N_INSNS (2);
7107 *total += COSTS_N_INSNS (1);
7109 /* We don't have the necessary insn, so we need to perform some
7111 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7112 /* An and with constant 255. */
7113 *total += COSTS_N_INSNS (1);
7115 /* A shift sequence. Increase costs slightly to avoid
7116 combining two shifts into an extend operation. */
7117 *total += COSTS_N_INSNS (2) + 1;
7123 switch (GET_MODE (XEXP (x, 0)))
7130 *total = COSTS_N_INSNS (1);
7140 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7144 if (const_ok_for_arm (INTVAL (x))
7145 || const_ok_for_arm (~INTVAL (x)))
7146 *total = COSTS_N_INSNS (1);
7148 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7149 INTVAL (x), NULL_RTX,
7156 *total = COSTS_N_INSNS (3);
7160 *total = COSTS_N_INSNS (1);
7164 *total = COSTS_N_INSNS (1);
7165 *total += rtx_cost (XEXP (x, 0), code, speed);
7169 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7170 && (mode == SFmode || !TARGET_VFP_SINGLE))
7171 *total = COSTS_N_INSNS (1);
7173 *total = COSTS_N_INSNS (4);
7177 *total = COSTS_N_INSNS (4);
7182 /* Estimates the size cost of thumb1 instructions.
7183 For now most of the code is copied from thumb1_rtx_costs. We need more
7184 fine grain tuning when we have more related test cases. */
7186 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7188 enum machine_mode mode = GET_MODE (x);
7201 return COSTS_N_INSNS (1);
7204 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7206 /* Thumb1 mul instruction can't operate on const. We must Load it
7207 into a register first. */
7208 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7209 return COSTS_N_INSNS (1) + const_size;
7211 return COSTS_N_INSNS (1);
7214 return (COSTS_N_INSNS (1)
7215 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7216 + GET_CODE (SET_DEST (x)) == MEM));
7221 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7222 return COSTS_N_INSNS (1);
7223 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7224 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7225 return COSTS_N_INSNS (2);
7226 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7227 if (thumb_shiftable_const (INTVAL (x)))
7228 return COSTS_N_INSNS (2);
7229 return COSTS_N_INSNS (3);
7231 else if ((outer == PLUS || outer == COMPARE)
7232 && INTVAL (x) < 256 && INTVAL (x) > -256)
7234 else if ((outer == IOR || outer == XOR || outer == AND)
7235 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7236 return COSTS_N_INSNS (1);
7237 else if (outer == AND)
7240 /* This duplicates the tests in the andsi3 expander. */
7241 for (i = 9; i <= 31; i++)
7242 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7243 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7244 return COSTS_N_INSNS (2);
7246 else if (outer == ASHIFT || outer == ASHIFTRT
7247 || outer == LSHIFTRT)
7249 return COSTS_N_INSNS (2);
7255 return COSTS_N_INSNS (3);
7273 /* XXX another guess. */
7274 /* Memory costs quite a lot for the first word, but subsequent words
7275 load at the equivalent of a single insn each. */
7276 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7277 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7282 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7287 /* XXX still guessing. */
7288 switch (GET_MODE (XEXP (x, 0)))
7291 return (1 + (mode == DImode ? 4 : 0)
7292 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7295 return (4 + (mode == DImode ? 4 : 0)
7296 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7299 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7310 /* RTX costs when optimizing for size. */
7312 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7315 enum machine_mode mode = GET_MODE (x);
7318 *total = thumb1_size_rtx_costs (x, code, outer_code);
7322 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7326 /* A memory access costs 1 insn if the mode is small, or the address is
7327 a single register, otherwise it costs one insn per word. */
7328 if (REG_P (XEXP (x, 0)))
7329 *total = COSTS_N_INSNS (1);
7331 && GET_CODE (XEXP (x, 0)) == PLUS
7332 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7333 /* This will be split into two instructions.
7334 See arm.md:calculate_pic_address. */
7335 *total = COSTS_N_INSNS (2);
7337 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7344 /* Needs a libcall, so it costs about this. */
7345 *total = COSTS_N_INSNS (2);
7349 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7351 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7359 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7361 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7364 else if (mode == SImode)
7366 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7367 /* Slightly disparage register shifts, but not by much. */
7368 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7369 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7373 /* Needs a libcall. */
7374 *total = COSTS_N_INSNS (2);
7378 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7379 && (mode == SFmode || !TARGET_VFP_SINGLE))
7381 *total = COSTS_N_INSNS (1);
7387 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7388 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7390 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7391 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7392 || subcode1 == ROTATE || subcode1 == ROTATERT
7393 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7394 || subcode1 == ASHIFTRT)
7396 /* It's just the cost of the two operands. */
7401 *total = COSTS_N_INSNS (1);
7405 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7409 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7410 && (mode == SFmode || !TARGET_VFP_SINGLE))
7412 *total = COSTS_N_INSNS (1);
7416 /* A shift as a part of ADD costs nothing. */
7417 if (GET_CODE (XEXP (x, 0)) == MULT
7418 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7420 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7421 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7422 *total += rtx_cost (XEXP (x, 1), code, false);
7427 case AND: case XOR: case IOR:
7430 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7432 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7433 || subcode == LSHIFTRT || subcode == ASHIFTRT
7434 || (code == AND && subcode == NOT))
7436 /* It's just the cost of the two operands. */
7442 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7446 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7450 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7451 && (mode == SFmode || !TARGET_VFP_SINGLE))
7453 *total = COSTS_N_INSNS (1);
7459 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7468 if (cc_register (XEXP (x, 0), VOIDmode))
7471 *total = COSTS_N_INSNS (1);
7475 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7476 && (mode == SFmode || !TARGET_VFP_SINGLE))
7477 *total = COSTS_N_INSNS (1);
7479 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7484 return arm_rtx_costs_1 (x, outer_code, total, 0);
7487 if (const_ok_for_arm (INTVAL (x)))
7488 /* A multiplication by a constant requires another instruction
7489 to load the constant to a register. */
7490 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7492 else if (const_ok_for_arm (~INTVAL (x)))
7493 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7494 else if (const_ok_for_arm (-INTVAL (x)))
7496 if (outer_code == COMPARE || outer_code == PLUS
7497 || outer_code == MINUS)
7500 *total = COSTS_N_INSNS (1);
7503 *total = COSTS_N_INSNS (2);
7509 *total = COSTS_N_INSNS (2);
7513 *total = COSTS_N_INSNS (4);
7518 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7519 cost of these slightly. */
7520 *total = COSTS_N_INSNS (1) + 1;
7524 if (mode != VOIDmode)
7525 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7527 *total = COSTS_N_INSNS (4); /* How knows? */
7532 /* RTX costs when optimizing for size. */
7534 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7538 return arm_size_rtx_costs (x, (enum rtx_code) code,
7539 (enum rtx_code) outer_code, total);
7541 return current_tune->rtx_costs (x, (enum rtx_code) code,
7542 (enum rtx_code) outer_code,
7546 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7547 supported on any "slowmul" cores, so it can be ignored. */
7550 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7551 int *total, bool speed)
7553 enum machine_mode mode = GET_MODE (x);
7557 *total = thumb1_rtx_costs (x, code, outer_code);
7564 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7567 *total = COSTS_N_INSNS (20);
7571 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7573 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7574 & (unsigned HOST_WIDE_INT) 0xffffffff);
7575 int cost, const_ok = const_ok_for_arm (i);
7576 int j, booth_unit_size;
7578 /* Tune as appropriate. */
7579 cost = const_ok ? 4 : 8;
7580 booth_unit_size = 2;
7581 for (j = 0; i && j < 32; j += booth_unit_size)
7583 i >>= booth_unit_size;
7587 *total = COSTS_N_INSNS (cost);
7588 *total += rtx_cost (XEXP (x, 0), code, speed);
7592 *total = COSTS_N_INSNS (20);
7596 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7601 /* RTX cost for cores with a fast multiply unit (M variants). */
7604 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7605 int *total, bool speed)
7607 enum machine_mode mode = GET_MODE (x);
7611 *total = thumb1_rtx_costs (x, code, outer_code);
7615 /* ??? should thumb2 use different costs? */
7619 /* There is no point basing this on the tuning, since it is always the
7620 fast variant if it exists at all. */
7622 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7623 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7624 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7626 *total = COSTS_N_INSNS(2);
7633 *total = COSTS_N_INSNS (5);
7637 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7639 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7640 & (unsigned HOST_WIDE_INT) 0xffffffff);
7641 int cost, const_ok = const_ok_for_arm (i);
7642 int j, booth_unit_size;
7644 /* Tune as appropriate. */
7645 cost = const_ok ? 4 : 8;
7646 booth_unit_size = 8;
7647 for (j = 0; i && j < 32; j += booth_unit_size)
7649 i >>= booth_unit_size;
7653 *total = COSTS_N_INSNS(cost);
7659 *total = COSTS_N_INSNS (4);
7663 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7665 if (TARGET_HARD_FLOAT
7667 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7669 *total = COSTS_N_INSNS (1);
7674 /* Requires a lib call */
7675 *total = COSTS_N_INSNS (20);
7679 return arm_rtx_costs_1 (x, outer_code, total, speed);
7684 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7685 so it can be ignored. */
7688 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7689 int *total, bool speed)
7691 enum machine_mode mode = GET_MODE (x);
7695 *total = thumb1_rtx_costs (x, code, outer_code);
7702 if (GET_CODE (XEXP (x, 0)) != MULT)
7703 return arm_rtx_costs_1 (x, outer_code, total, speed);
7705 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7706 will stall until the multiplication is complete. */
7707 *total = COSTS_N_INSNS (3);
7711 /* There is no point basing this on the tuning, since it is always the
7712 fast variant if it exists at all. */
7714 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7715 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7716 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7718 *total = COSTS_N_INSNS (2);
7725 *total = COSTS_N_INSNS (5);
7729 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7731 /* If operand 1 is a constant we can more accurately
7732 calculate the cost of the multiply. The multiplier can
7733 retire 15 bits on the first cycle and a further 12 on the
7734 second. We do, of course, have to load the constant into
7735 a register first. */
7736 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7737 /* There's a general overhead of one cycle. */
7739 unsigned HOST_WIDE_INT masked_const;
7744 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7746 masked_const = i & 0xffff8000;
7747 if (masked_const != 0)
7750 masked_const = i & 0xf8000000;
7751 if (masked_const != 0)
7754 *total = COSTS_N_INSNS (cost);
7760 *total = COSTS_N_INSNS (3);
7764 /* Requires a lib call */
7765 *total = COSTS_N_INSNS (20);
7769 return arm_rtx_costs_1 (x, outer_code, total, speed);
7774 /* RTX costs for 9e (and later) cores. */
7777 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7778 int *total, bool speed)
7780 enum machine_mode mode = GET_MODE (x);
7787 *total = COSTS_N_INSNS (3);
7791 *total = thumb1_rtx_costs (x, code, outer_code);
7799 /* There is no point basing this on the tuning, since it is always the
7800 fast variant if it exists at all. */
7802 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7803 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7804 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7806 *total = COSTS_N_INSNS (2);
7813 *total = COSTS_N_INSNS (5);
7819 *total = COSTS_N_INSNS (2);
7823 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7825 if (TARGET_HARD_FLOAT
7827 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7829 *total = COSTS_N_INSNS (1);
7834 *total = COSTS_N_INSNS (20);
7838 return arm_rtx_costs_1 (x, outer_code, total, speed);
7841 /* All address computations that can be done are free, but rtx cost returns
7842 the same for practically all of them. So we weight the different types
7843 of address here in the order (most pref first):
7844 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7846 arm_arm_address_cost (rtx x)
7848 enum rtx_code c = GET_CODE (x);
7850 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7852 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7857 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7860 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7870 arm_thumb_address_cost (rtx x)
7872 enum rtx_code c = GET_CODE (x);
7877 && GET_CODE (XEXP (x, 0)) == REG
7878 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7885 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7887 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7890 /* Adjust cost hook for XScale. */
7892 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7894 /* Some true dependencies can have a higher cost depending
7895 on precisely how certain input operands are used. */
7896 if (REG_NOTE_KIND(link) == 0
7897 && recog_memoized (insn) >= 0
7898 && recog_memoized (dep) >= 0)
7900 int shift_opnum = get_attr_shift (insn);
7901 enum attr_type attr_type = get_attr_type (dep);
7903 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7904 operand for INSN. If we have a shifted input operand and the
7905 instruction we depend on is another ALU instruction, then we may
7906 have to account for an additional stall. */
7907 if (shift_opnum != 0
7908 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7910 rtx shifted_operand;
7913 /* Get the shifted operand. */
7914 extract_insn (insn);
7915 shifted_operand = recog_data.operand[shift_opnum];
7917 /* Iterate over all the operands in DEP. If we write an operand
7918 that overlaps with SHIFTED_OPERAND, then we have increase the
7919 cost of this dependency. */
7921 preprocess_constraints ();
7922 for (opno = 0; opno < recog_data.n_operands; opno++)
7924 /* We can ignore strict inputs. */
7925 if (recog_data.operand_type[opno] == OP_IN)
7928 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7940 /* Adjust cost hook for Cortex A9. */
7942 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7944 switch (REG_NOTE_KIND (link))
7951 case REG_DEP_OUTPUT:
7952 if (recog_memoized (insn) >= 0
7953 && recog_memoized (dep) >= 0)
7955 if (GET_CODE (PATTERN (insn)) == SET)
7958 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7960 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7962 enum attr_type attr_type_insn = get_attr_type (insn);
7963 enum attr_type attr_type_dep = get_attr_type (dep);
7965 /* By default all dependencies of the form
7968 have an extra latency of 1 cycle because
7969 of the input and output dependency in this
7970 case. However this gets modeled as an true
7971 dependency and hence all these checks. */
7972 if (REG_P (SET_DEST (PATTERN (insn)))
7973 && REG_P (SET_DEST (PATTERN (dep)))
7974 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7975 SET_DEST (PATTERN (dep))))
7977 /* FMACS is a special case where the dependant
7978 instruction can be issued 3 cycles before
7979 the normal latency in case of an output
7981 if ((attr_type_insn == TYPE_FMACS
7982 || attr_type_insn == TYPE_FMACD)
7983 && (attr_type_dep == TYPE_FMACS
7984 || attr_type_dep == TYPE_FMACD))
7986 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7987 *cost = insn_default_latency (dep) - 3;
7989 *cost = insn_default_latency (dep);
7994 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7995 *cost = insn_default_latency (dep) + 1;
7997 *cost = insn_default_latency (dep);
8013 /* Adjust cost hook for FA726TE. */
8015 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8017 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8018 have penalty of 3. */
8019 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8020 && recog_memoized (insn) >= 0
8021 && recog_memoized (dep) >= 0
8022 && get_attr_conds (dep) == CONDS_SET)
8024 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8025 if (get_attr_conds (insn) == CONDS_USE
8026 && get_attr_type (insn) != TYPE_BRANCH)
8032 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8033 || get_attr_conds (insn) == CONDS_USE)
8043 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8044 It corrects the value of COST based on the relationship between
8045 INSN and DEP through the dependence LINK. It returns the new
8046 value. There is a per-core adjust_cost hook to adjust scheduler costs
8047 and the per-core hook can choose to completely override the generic
8048 adjust_cost function. Only put bits of code into arm_adjust_cost that
8049 are common across all cores. */
8051 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8055 /* When generating Thumb-1 code, we want to place flag-setting operations
8056 close to a conditional branch which depends on them, so that we can
8057 omit the comparison. */
8059 && REG_NOTE_KIND (link) == 0
8060 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8061 && recog_memoized (dep) >= 0
8062 && get_attr_conds (dep) == CONDS_SET)
8065 if (current_tune->sched_adjust_cost != NULL)
8067 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8071 /* XXX This is not strictly true for the FPA. */
8072 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8073 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8076 /* Call insns don't incur a stall, even if they follow a load. */
8077 if (REG_NOTE_KIND (link) == 0
8078 && GET_CODE (insn) == CALL_INSN)
8081 if ((i_pat = single_set (insn)) != NULL
8082 && GET_CODE (SET_SRC (i_pat)) == MEM
8083 && (d_pat = single_set (dep)) != NULL
8084 && GET_CODE (SET_DEST (d_pat)) == MEM)
8086 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8087 /* This is a load after a store, there is no conflict if the load reads
8088 from a cached area. Assume that loads from the stack, and from the
8089 constant pool are cached, and that others will miss. This is a
8092 if ((GET_CODE (src_mem) == SYMBOL_REF
8093 && CONSTANT_POOL_ADDRESS_P (src_mem))
8094 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8095 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8096 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8103 static int fp_consts_inited = 0;
8105 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8106 static const char * const strings_fp[8] =
8109 "4", "5", "0.5", "10"
8112 static REAL_VALUE_TYPE values_fp[8];
8115 init_fp_table (void)
8121 fp_consts_inited = 1;
8123 fp_consts_inited = 8;
8125 for (i = 0; i < fp_consts_inited; i++)
8127 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8132 /* Return TRUE if rtx X is a valid immediate FP constant. */
8134 arm_const_double_rtx (rtx x)
8139 if (!fp_consts_inited)
8142 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8143 if (REAL_VALUE_MINUS_ZERO (r))
8146 for (i = 0; i < fp_consts_inited; i++)
8147 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8153 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8155 neg_const_double_rtx_ok_for_fpa (rtx x)
8160 if (!fp_consts_inited)
8163 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8164 r = real_value_negate (&r);
8165 if (REAL_VALUE_MINUS_ZERO (r))
8168 for (i = 0; i < 8; i++)
8169 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8176 /* VFPv3 has a fairly wide range of representable immediates, formed from
8177 "quarter-precision" floating-point values. These can be evaluated using this
8178 formula (with ^ for exponentiation):
8182 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8183 16 <= n <= 31 and 0 <= r <= 7.
8185 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8187 - A (most-significant) is the sign bit.
8188 - BCD are the exponent (encoded as r XOR 3).
8189 - EFGH are the mantissa (encoded as n - 16).
8192 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8193 fconst[sd] instruction, or -1 if X isn't suitable. */
8195 vfp3_const_double_index (rtx x)
8197 REAL_VALUE_TYPE r, m;
8199 unsigned HOST_WIDE_INT mantissa, mant_hi;
8200 unsigned HOST_WIDE_INT mask;
8201 HOST_WIDE_INT m1, m2;
8202 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8204 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8207 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8209 /* We can't represent these things, so detect them first. */
8210 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8213 /* Extract sign, exponent and mantissa. */
8214 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8215 r = real_value_abs (&r);
8216 exponent = REAL_EXP (&r);
8217 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8218 highest (sign) bit, with a fixed binary point at bit point_pos.
8219 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8220 bits for the mantissa, this may fail (low bits would be lost). */
8221 real_ldexp (&m, &r, point_pos - exponent);
8222 REAL_VALUE_TO_INT (&m1, &m2, m);
8226 /* If there are bits set in the low part of the mantissa, we can't
8227 represent this value. */
8231 /* Now make it so that mantissa contains the most-significant bits, and move
8232 the point_pos to indicate that the least-significant bits have been
8234 point_pos -= HOST_BITS_PER_WIDE_INT;
8237 /* We can permit four significant bits of mantissa only, plus a high bit
8238 which is always 1. */
8239 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8240 if ((mantissa & mask) != 0)
8243 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8244 mantissa >>= point_pos - 5;
8246 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8247 floating-point immediate zero with Neon using an integer-zero load, but
8248 that case is handled elsewhere.) */
8252 gcc_assert (mantissa >= 16 && mantissa <= 31);
8254 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8255 normalized significands are in the range [1, 2). (Our mantissa is shifted
8256 left 4 places at this point relative to normalized IEEE754 values). GCC
8257 internally uses [0.5, 1) (see real.c), so the exponent returned from
8258 REAL_EXP must be altered. */
8259 exponent = 5 - exponent;
8261 if (exponent < 0 || exponent > 7)
8264 /* Sign, mantissa and exponent are now in the correct form to plug into the
8265 formula described in the comment above. */
8266 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8269 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8271 vfp3_const_double_rtx (rtx x)
8276 return vfp3_const_double_index (x) != -1;
8279 /* Recognize immediates which can be used in various Neon instructions. Legal
8280 immediates are described by the following table (for VMVN variants, the
8281 bitwise inverse of the constant shown is recognized. In either case, VMOV
8282 is output and the correct instruction to use for a given constant is chosen
8283 by the assembler). The constant shown is replicated across all elements of
8284 the destination vector.
8286 insn elems variant constant (binary)
8287 ---- ----- ------- -----------------
8288 vmov i32 0 00000000 00000000 00000000 abcdefgh
8289 vmov i32 1 00000000 00000000 abcdefgh 00000000
8290 vmov i32 2 00000000 abcdefgh 00000000 00000000
8291 vmov i32 3 abcdefgh 00000000 00000000 00000000
8292 vmov i16 4 00000000 abcdefgh
8293 vmov i16 5 abcdefgh 00000000
8294 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8295 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8296 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8297 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8298 vmvn i16 10 00000000 abcdefgh
8299 vmvn i16 11 abcdefgh 00000000
8300 vmov i32 12 00000000 00000000 abcdefgh 11111111
8301 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8302 vmov i32 14 00000000 abcdefgh 11111111 11111111
8303 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8305 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8306 eeeeeeee ffffffff gggggggg hhhhhhhh
8307 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8309 For case 18, B = !b. Representable values are exactly those accepted by
8310 vfp3_const_double_index, but are output as floating-point numbers rather
8313 Variants 0-5 (inclusive) may also be used as immediates for the second
8314 operand of VORR/VBIC instructions.
8316 The INVERSE argument causes the bitwise inverse of the given operand to be
8317 recognized instead (used for recognizing legal immediates for the VAND/VORN
8318 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8319 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8320 output, rather than the real insns vbic/vorr).
8322 INVERSE makes no difference to the recognition of float vectors.
8324 The return value is the variant of immediate as shown in the above table, or
8325 -1 if the given value doesn't match any of the listed patterns.
8328 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8329 rtx *modconst, int *elementwidth)
8331 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8333 for (i = 0; i < idx; i += (STRIDE)) \
8338 immtype = (CLASS); \
8339 elsize = (ELSIZE); \
8343 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8344 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8345 unsigned char bytes[16];
8346 int immtype = -1, matches;
8347 unsigned int invmask = inverse ? 0xff : 0;
8349 /* Vectors of float constants. */
8350 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8352 rtx el0 = CONST_VECTOR_ELT (op, 0);
8355 if (!vfp3_const_double_rtx (el0))
8358 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8360 for (i = 1; i < n_elts; i++)
8362 rtx elt = CONST_VECTOR_ELT (op, i);
8365 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8367 if (!REAL_VALUES_EQUAL (r0, re))
8372 *modconst = CONST_VECTOR_ELT (op, 0);
8380 /* Splat vector constant out into a byte vector. */
8381 for (i = 0; i < n_elts; i++)
8383 rtx el = CONST_VECTOR_ELT (op, i);
8384 unsigned HOST_WIDE_INT elpart;
8385 unsigned int part, parts;
8387 if (GET_CODE (el) == CONST_INT)
8389 elpart = INTVAL (el);
8392 else if (GET_CODE (el) == CONST_DOUBLE)
8394 elpart = CONST_DOUBLE_LOW (el);
8400 for (part = 0; part < parts; part++)
8403 for (byte = 0; byte < innersize; byte++)
8405 bytes[idx++] = (elpart & 0xff) ^ invmask;
8406 elpart >>= BITS_PER_UNIT;
8408 if (GET_CODE (el) == CONST_DOUBLE)
8409 elpart = CONST_DOUBLE_HIGH (el);
8414 gcc_assert (idx == GET_MODE_SIZE (mode));
8418 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8419 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8421 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8422 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8424 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8425 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8427 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8428 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8430 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8432 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8434 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8435 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8437 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8438 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8440 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8441 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8443 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8444 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8446 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8448 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8450 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8451 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8453 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8454 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8456 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8457 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8459 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8460 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8462 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8464 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8465 && bytes[i] == bytes[(i + 8) % idx]);
8473 *elementwidth = elsize;
8477 unsigned HOST_WIDE_INT imm = 0;
8479 /* Un-invert bytes of recognized vector, if necessary. */
8481 for (i = 0; i < idx; i++)
8482 bytes[i] ^= invmask;
8486 /* FIXME: Broken on 32-bit H_W_I hosts. */
8487 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8489 for (i = 0; i < 8; i++)
8490 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8491 << (i * BITS_PER_UNIT);
8493 *modconst = GEN_INT (imm);
8497 unsigned HOST_WIDE_INT imm = 0;
8499 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8500 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8502 *modconst = GEN_INT (imm);
8510 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8511 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8512 float elements), and a modified constant (whatever should be output for a
8513 VMOV) in *MODCONST. */
8516 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8517 rtx *modconst, int *elementwidth)
8521 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8527 *modconst = tmpconst;
8530 *elementwidth = tmpwidth;
8535 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8536 the immediate is valid, write a constant suitable for using as an operand
8537 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8538 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8541 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8542 rtx *modconst, int *elementwidth)
8546 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8548 if (retval < 0 || retval > 5)
8552 *modconst = tmpconst;
8555 *elementwidth = tmpwidth;
8560 /* Return a string suitable for output of Neon immediate logic operation
8564 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8565 int inverse, int quad)
8567 int width, is_valid;
8568 static char templ[40];
8570 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8572 gcc_assert (is_valid != 0);
8575 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8577 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8582 /* Output a sequence of pairwise operations to implement a reduction.
8583 NOTE: We do "too much work" here, because pairwise operations work on two
8584 registers-worth of operands in one go. Unfortunately we can't exploit those
8585 extra calculations to do the full operation in fewer steps, I don't think.
8586 Although all vector elements of the result but the first are ignored, we
8587 actually calculate the same result in each of the elements. An alternative
8588 such as initially loading a vector with zero to use as each of the second
8589 operands would use up an additional register and take an extra instruction,
8590 for no particular gain. */
8593 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8594 rtx (*reduc) (rtx, rtx, rtx))
8596 enum machine_mode inner = GET_MODE_INNER (mode);
8597 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8600 for (i = parts / 2; i >= 1; i /= 2)
8602 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8603 emit_insn (reduc (dest, tmpsum, tmpsum));
8608 /* If VALS is a vector constant that can be loaded into a register
8609 using VDUP, generate instructions to do so and return an RTX to
8610 assign to the register. Otherwise return NULL_RTX. */
8613 neon_vdup_constant (rtx vals)
8615 enum machine_mode mode = GET_MODE (vals);
8616 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8617 int n_elts = GET_MODE_NUNITS (mode);
8618 bool all_same = true;
8622 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8625 for (i = 0; i < n_elts; ++i)
8627 x = XVECEXP (vals, 0, i);
8628 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8633 /* The elements are not all the same. We could handle repeating
8634 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8635 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8639 /* We can load this constant by using VDUP and a constant in a
8640 single ARM register. This will be cheaper than a vector
8643 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8644 return gen_rtx_VEC_DUPLICATE (mode, x);
8647 /* Generate code to load VALS, which is a PARALLEL containing only
8648 constants (for vec_init) or CONST_VECTOR, efficiently into a
8649 register. Returns an RTX to copy into the register, or NULL_RTX
8650 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8653 neon_make_constant (rtx vals)
8655 enum machine_mode mode = GET_MODE (vals);
8657 rtx const_vec = NULL_RTX;
8658 int n_elts = GET_MODE_NUNITS (mode);
8662 if (GET_CODE (vals) == CONST_VECTOR)
8664 else if (GET_CODE (vals) == PARALLEL)
8666 /* A CONST_VECTOR must contain only CONST_INTs and
8667 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8668 Only store valid constants in a CONST_VECTOR. */
8669 for (i = 0; i < n_elts; ++i)
8671 rtx x = XVECEXP (vals, 0, i);
8672 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8675 if (n_const == n_elts)
8676 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8681 if (const_vec != NULL
8682 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8683 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8685 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8686 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8687 pipeline cycle; creating the constant takes one or two ARM
8690 else if (const_vec != NULL_RTX)
8691 /* Load from constant pool. On Cortex-A8 this takes two cycles
8692 (for either double or quad vectors). We can not take advantage
8693 of single-cycle VLD1 because we need a PC-relative addressing
8697 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8698 We can not construct an initializer. */
8702 /* Initialize vector TARGET to VALS. */
8705 neon_expand_vector_init (rtx target, rtx vals)
8707 enum machine_mode mode = GET_MODE (target);
8708 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8709 int n_elts = GET_MODE_NUNITS (mode);
8710 int n_var = 0, one_var = -1;
8711 bool all_same = true;
8715 for (i = 0; i < n_elts; ++i)
8717 x = XVECEXP (vals, 0, i);
8718 if (!CONSTANT_P (x))
8719 ++n_var, one_var = i;
8721 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8727 rtx constant = neon_make_constant (vals);
8728 if (constant != NULL_RTX)
8730 emit_move_insn (target, constant);
8735 /* Splat a single non-constant element if we can. */
8736 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8738 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8739 emit_insn (gen_rtx_SET (VOIDmode, target,
8740 gen_rtx_VEC_DUPLICATE (mode, x)));
8744 /* One field is non-constant. Load constant then overwrite varying
8745 field. This is more efficient than using the stack. */
8748 rtx copy = copy_rtx (vals);
8749 rtx index = GEN_INT (one_var);
8751 /* Load constant part of vector, substitute neighboring value for
8753 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8754 neon_expand_vector_init (target, copy);
8756 /* Insert variable. */
8757 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8761 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8764 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8767 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8770 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8773 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8776 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8779 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8782 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8785 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8793 /* Construct the vector in memory one field at a time
8794 and load the whole vector. */
8795 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8796 for (i = 0; i < n_elts; i++)
8797 emit_move_insn (adjust_address_nv (mem, inner_mode,
8798 i * GET_MODE_SIZE (inner_mode)),
8799 XVECEXP (vals, 0, i));
8800 emit_move_insn (target, mem);
8803 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8804 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8805 reported source locations are bogus. */
8808 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8813 gcc_assert (GET_CODE (operand) == CONST_INT);
8815 lane = INTVAL (operand);
8817 if (lane < low || lane >= high)
8821 /* Bounds-check lanes. */
8824 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8826 bounds_check (operand, low, high, "lane out of range");
8829 /* Bounds-check constants. */
8832 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8834 bounds_check (operand, low, high, "constant out of range");
8838 neon_element_bits (enum machine_mode mode)
8841 return GET_MODE_BITSIZE (mode);
8843 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8847 /* Predicates for `match_operand' and `match_operator'. */
8849 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8851 cirrus_memory_offset (rtx op)
8853 /* Reject eliminable registers. */
8854 if (! (reload_in_progress || reload_completed)
8855 && ( reg_mentioned_p (frame_pointer_rtx, op)
8856 || reg_mentioned_p (arg_pointer_rtx, op)
8857 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8858 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8859 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8860 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8863 if (GET_CODE (op) == MEM)
8869 /* Match: (mem (reg)). */
8870 if (GET_CODE (ind) == REG)
8876 if (GET_CODE (ind) == PLUS
8877 && GET_CODE (XEXP (ind, 0)) == REG
8878 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8879 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8886 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8887 WB is true if full writeback address modes are allowed and is false
8888 if limited writeback address modes (POST_INC and PRE_DEC) are
8892 arm_coproc_mem_operand (rtx op, bool wb)
8896 /* Reject eliminable registers. */
8897 if (! (reload_in_progress || reload_completed)
8898 && ( reg_mentioned_p (frame_pointer_rtx, op)
8899 || reg_mentioned_p (arg_pointer_rtx, op)
8900 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8901 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8902 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8903 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8906 /* Constants are converted into offsets from labels. */
8907 if (GET_CODE (op) != MEM)
8912 if (reload_completed
8913 && (GET_CODE (ind) == LABEL_REF
8914 || (GET_CODE (ind) == CONST
8915 && GET_CODE (XEXP (ind, 0)) == PLUS
8916 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8917 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8920 /* Match: (mem (reg)). */
8921 if (GET_CODE (ind) == REG)
8922 return arm_address_register_rtx_p (ind, 0);
8924 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8925 acceptable in any case (subject to verification by
8926 arm_address_register_rtx_p). We need WB to be true to accept
8927 PRE_INC and POST_DEC. */
8928 if (GET_CODE (ind) == POST_INC
8929 || GET_CODE (ind) == PRE_DEC
8931 && (GET_CODE (ind) == PRE_INC
8932 || GET_CODE (ind) == POST_DEC)))
8933 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8936 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8937 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8938 && GET_CODE (XEXP (ind, 1)) == PLUS
8939 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8940 ind = XEXP (ind, 1);
8945 if (GET_CODE (ind) == PLUS
8946 && GET_CODE (XEXP (ind, 0)) == REG
8947 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8948 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8949 && INTVAL (XEXP (ind, 1)) > -1024
8950 && INTVAL (XEXP (ind, 1)) < 1024
8951 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8957 /* Return TRUE if OP is a memory operand which we can load or store a vector
8958 to/from. TYPE is one of the following values:
8959 0 - Vector load/stor (vldr)
8960 1 - Core registers (ldm)
8961 2 - Element/structure loads (vld1)
8964 neon_vector_mem_operand (rtx op, int type)
8968 /* Reject eliminable registers. */
8969 if (! (reload_in_progress || reload_completed)
8970 && ( reg_mentioned_p (frame_pointer_rtx, op)
8971 || reg_mentioned_p (arg_pointer_rtx, op)
8972 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8973 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8974 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8975 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8978 /* Constants are converted into offsets from labels. */
8979 if (GET_CODE (op) != MEM)
8984 if (reload_completed
8985 && (GET_CODE (ind) == LABEL_REF
8986 || (GET_CODE (ind) == CONST
8987 && GET_CODE (XEXP (ind, 0)) == PLUS
8988 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8989 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8992 /* Match: (mem (reg)). */
8993 if (GET_CODE (ind) == REG)
8994 return arm_address_register_rtx_p (ind, 0);
8996 /* Allow post-increment with Neon registers. */
8997 if ((type != 1 && GET_CODE (ind) == POST_INC)
8998 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8999 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9001 /* FIXME: vld1 allows register post-modify. */
9007 && GET_CODE (ind) == PLUS
9008 && GET_CODE (XEXP (ind, 0)) == REG
9009 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9010 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9011 && INTVAL (XEXP (ind, 1)) > -1024
9012 && INTVAL (XEXP (ind, 1)) < 1016
9013 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9019 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9022 neon_struct_mem_operand (rtx op)
9026 /* Reject eliminable registers. */
9027 if (! (reload_in_progress || reload_completed)
9028 && ( reg_mentioned_p (frame_pointer_rtx, op)
9029 || reg_mentioned_p (arg_pointer_rtx, op)
9030 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9031 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9032 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9033 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9036 /* Constants are converted into offsets from labels. */
9037 if (GET_CODE (op) != MEM)
9042 if (reload_completed
9043 && (GET_CODE (ind) == LABEL_REF
9044 || (GET_CODE (ind) == CONST
9045 && GET_CODE (XEXP (ind, 0)) == PLUS
9046 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9047 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9050 /* Match: (mem (reg)). */
9051 if (GET_CODE (ind) == REG)
9052 return arm_address_register_rtx_p (ind, 0);
9057 /* Return true if X is a register that will be eliminated later on. */
9059 arm_eliminable_register (rtx x)
9061 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9062 || REGNO (x) == ARG_POINTER_REGNUM
9063 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9064 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9067 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9068 coprocessor registers. Otherwise return NO_REGS. */
9071 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9075 if (!TARGET_NEON_FP16)
9076 return GENERAL_REGS;
9077 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9079 return GENERAL_REGS;
9083 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9084 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9085 && neon_vector_mem_operand (x, 0))
9088 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9091 return GENERAL_REGS;
9094 /* Values which must be returned in the most-significant end of the return
9098 arm_return_in_msb (const_tree valtype)
9100 return (TARGET_AAPCS_BASED
9102 && (AGGREGATE_TYPE_P (valtype)
9103 || TREE_CODE (valtype) == COMPLEX_TYPE));
9106 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9107 Use by the Cirrus Maverick code which has to workaround
9108 a hardware bug triggered by such instructions. */
9110 arm_memory_load_p (rtx insn)
9112 rtx body, lhs, rhs;;
9114 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9117 body = PATTERN (insn);
9119 if (GET_CODE (body) != SET)
9122 lhs = XEXP (body, 0);
9123 rhs = XEXP (body, 1);
9125 lhs = REG_OR_SUBREG_RTX (lhs);
9127 /* If the destination is not a general purpose
9128 register we do not have to worry. */
9129 if (GET_CODE (lhs) != REG
9130 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9133 /* As well as loads from memory we also have to react
9134 to loads of invalid constants which will be turned
9135 into loads from the minipool. */
9136 return (GET_CODE (rhs) == MEM
9137 || GET_CODE (rhs) == SYMBOL_REF
9138 || note_invalid_constants (insn, -1, false));
9141 /* Return TRUE if INSN is a Cirrus instruction. */
9143 arm_cirrus_insn_p (rtx insn)
9145 enum attr_cirrus attr;
9147 /* get_attr cannot accept USE or CLOBBER. */
9149 || GET_CODE (insn) != INSN
9150 || GET_CODE (PATTERN (insn)) == USE
9151 || GET_CODE (PATTERN (insn)) == CLOBBER)
9154 attr = get_attr_cirrus (insn);
9156 return attr != CIRRUS_NOT;
9159 /* Cirrus reorg for invalid instruction combinations. */
9161 cirrus_reorg (rtx first)
9163 enum attr_cirrus attr;
9164 rtx body = PATTERN (first);
9168 /* Any branch must be followed by 2 non Cirrus instructions. */
9169 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9172 t = next_nonnote_insn (first);
9174 if (arm_cirrus_insn_p (t))
9177 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9181 emit_insn_after (gen_nop (), first);
9186 /* (float (blah)) is in parallel with a clobber. */
9187 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9188 body = XVECEXP (body, 0, 0);
9190 if (GET_CODE (body) == SET)
9192 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9194 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9195 be followed by a non Cirrus insn. */
9196 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9198 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9199 emit_insn_after (gen_nop (), first);
9203 else if (arm_memory_load_p (first))
9205 unsigned int arm_regno;
9207 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9208 ldr/cfmv64hr combination where the Rd field is the same
9209 in both instructions must be split with a non Cirrus
9216 /* Get Arm register number for ldr insn. */
9217 if (GET_CODE (lhs) == REG)
9218 arm_regno = REGNO (lhs);
9221 gcc_assert (GET_CODE (rhs) == REG);
9222 arm_regno = REGNO (rhs);
9226 first = next_nonnote_insn (first);
9228 if (! arm_cirrus_insn_p (first))
9231 body = PATTERN (first);
9233 /* (float (blah)) is in parallel with a clobber. */
9234 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9235 body = XVECEXP (body, 0, 0);
9237 if (GET_CODE (body) == FLOAT)
9238 body = XEXP (body, 0);
9240 if (get_attr_cirrus (first) == CIRRUS_MOVE
9241 && GET_CODE (XEXP (body, 1)) == REG
9242 && arm_regno == REGNO (XEXP (body, 1)))
9243 emit_insn_after (gen_nop (), first);
9249 /* get_attr cannot accept USE or CLOBBER. */
9251 || GET_CODE (first) != INSN
9252 || GET_CODE (PATTERN (first)) == USE
9253 || GET_CODE (PATTERN (first)) == CLOBBER)
9256 attr = get_attr_cirrus (first);
9258 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9259 must be followed by a non-coprocessor instruction. */
9260 if (attr == CIRRUS_COMPARE)
9264 t = next_nonnote_insn (first);
9266 if (arm_cirrus_insn_p (t))
9269 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9273 emit_insn_after (gen_nop (), first);
9279 /* Return TRUE if X references a SYMBOL_REF. */
9281 symbol_mentioned_p (rtx x)
9286 if (GET_CODE (x) == SYMBOL_REF)
9289 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9290 are constant offsets, not symbols. */
9291 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9294 fmt = GET_RTX_FORMAT (GET_CODE (x));
9296 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9302 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9303 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9306 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9313 /* Return TRUE if X references a LABEL_REF. */
9315 label_mentioned_p (rtx x)
9320 if (GET_CODE (x) == LABEL_REF)
9323 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9324 instruction, but they are constant offsets, not symbols. */
9325 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9328 fmt = GET_RTX_FORMAT (GET_CODE (x));
9329 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9335 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9336 if (label_mentioned_p (XVECEXP (x, i, j)))
9339 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9347 tls_mentioned_p (rtx x)
9349 switch (GET_CODE (x))
9352 return tls_mentioned_p (XEXP (x, 0));
9355 if (XINT (x, 1) == UNSPEC_TLS)
9363 /* Must not copy any rtx that uses a pc-relative address. */
9366 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9368 if (GET_CODE (*x) == UNSPEC
9369 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9375 arm_cannot_copy_insn_p (rtx insn)
9377 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9383 enum rtx_code code = GET_CODE (x);
9400 /* Return 1 if memory locations are adjacent. */
9402 adjacent_mem_locations (rtx a, rtx b)
9404 /* We don't guarantee to preserve the order of these memory refs. */
9405 if (volatile_refs_p (a) || volatile_refs_p (b))
9408 if ((GET_CODE (XEXP (a, 0)) == REG
9409 || (GET_CODE (XEXP (a, 0)) == PLUS
9410 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9411 && (GET_CODE (XEXP (b, 0)) == REG
9412 || (GET_CODE (XEXP (b, 0)) == PLUS
9413 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9415 HOST_WIDE_INT val0 = 0, val1 = 0;
9419 if (GET_CODE (XEXP (a, 0)) == PLUS)
9421 reg0 = XEXP (XEXP (a, 0), 0);
9422 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9427 if (GET_CODE (XEXP (b, 0)) == PLUS)
9429 reg1 = XEXP (XEXP (b, 0), 0);
9430 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9435 /* Don't accept any offset that will require multiple
9436 instructions to handle, since this would cause the
9437 arith_adjacentmem pattern to output an overlong sequence. */
9438 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9441 /* Don't allow an eliminable register: register elimination can make
9442 the offset too large. */
9443 if (arm_eliminable_register (reg0))
9446 val_diff = val1 - val0;
9450 /* If the target has load delay slots, then there's no benefit
9451 to using an ldm instruction unless the offset is zero and
9452 we are optimizing for size. */
9453 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9454 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9455 && (val_diff == 4 || val_diff == -4));
9458 return ((REGNO (reg0) == REGNO (reg1))
9459 && (val_diff == 4 || val_diff == -4));
9465 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9466 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9467 instruction. ADD_OFFSET is nonzero if the base address register needs
9468 to be modified with an add instruction before we can use it. */
9471 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9472 int nops, HOST_WIDE_INT add_offset)
9474 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9475 if the offset isn't small enough. The reason 2 ldrs are faster
9476 is because these ARMs are able to do more than one cache access
9477 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9478 whilst the ARM8 has a double bandwidth cache. This means that
9479 these cores can do both an instruction fetch and a data fetch in
9480 a single cycle, so the trick of calculating the address into a
9481 scratch register (one of the result regs) and then doing a load
9482 multiple actually becomes slower (and no smaller in code size).
9483 That is the transformation
9485 ldr rd1, [rbase + offset]
9486 ldr rd2, [rbase + offset + 4]
9490 add rd1, rbase, offset
9491 ldmia rd1, {rd1, rd2}
9493 produces worse code -- '3 cycles + any stalls on rd2' instead of
9494 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9495 access per cycle, the first sequence could never complete in less
9496 than 6 cycles, whereas the ldm sequence would only take 5 and
9497 would make better use of sequential accesses if not hitting the
9500 We cheat here and test 'arm_ld_sched' which we currently know to
9501 only be true for the ARM8, ARM9 and StrongARM. If this ever
9502 changes, then the test below needs to be reworked. */
9503 if (nops == 2 && arm_ld_sched && add_offset != 0)
9506 /* XScale has load-store double instructions, but they have stricter
9507 alignment requirements than load-store multiple, so we cannot
9510 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9511 the pipeline until completion.
9519 An ldr instruction takes 1-3 cycles, but does not block the
9528 Best case ldr will always win. However, the more ldr instructions
9529 we issue, the less likely we are to be able to schedule them well.
9530 Using ldr instructions also increases code size.
9532 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9533 for counts of 3 or 4 regs. */
9534 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9539 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9540 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9541 an array ORDER which describes the sequence to use when accessing the
9542 offsets that produces an ascending order. In this sequence, each
9543 offset must be larger by exactly 4 than the previous one. ORDER[0]
9544 must have been filled in with the lowest offset by the caller.
9545 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9546 we use to verify that ORDER produces an ascending order of registers.
9547 Return true if it was possible to construct such an order, false if
9551 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9555 for (i = 1; i < nops; i++)
9559 order[i] = order[i - 1];
9560 for (j = 0; j < nops; j++)
9561 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9563 /* We must find exactly one offset that is higher than the
9564 previous one by 4. */
9565 if (order[i] != order[i - 1])
9569 if (order[i] == order[i - 1])
9571 /* The register numbers must be ascending. */
9572 if (unsorted_regs != NULL
9573 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9579 /* Used to determine in a peephole whether a sequence of load
9580 instructions can be changed into a load-multiple instruction.
9581 NOPS is the number of separate load instructions we are examining. The
9582 first NOPS entries in OPERANDS are the destination registers, the
9583 next NOPS entries are memory operands. If this function is
9584 successful, *BASE is set to the common base register of the memory
9585 accesses; *LOAD_OFFSET is set to the first memory location's offset
9586 from that base register.
9587 REGS is an array filled in with the destination register numbers.
9588 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9589 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9590 the sequence of registers in REGS matches the loads from ascending memory
9591 locations, and the function verifies that the register numbers are
9592 themselves ascending. If CHECK_REGS is false, the register numbers
9593 are stored in the order they are found in the operands. */
9595 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9596 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9598 int unsorted_regs[MAX_LDM_STM_OPS];
9599 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9600 int order[MAX_LDM_STM_OPS];
9601 rtx base_reg_rtx = NULL;
9605 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9606 easily extended if required. */
9607 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9609 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9611 /* Loop over the operands and check that the memory references are
9612 suitable (i.e. immediate offsets from the same base register). At
9613 the same time, extract the target register, and the memory
9615 for (i = 0; i < nops; i++)
9620 /* Convert a subreg of a mem into the mem itself. */
9621 if (GET_CODE (operands[nops + i]) == SUBREG)
9622 operands[nops + i] = alter_subreg (operands + (nops + i));
9624 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9626 /* Don't reorder volatile memory references; it doesn't seem worth
9627 looking for the case where the order is ok anyway. */
9628 if (MEM_VOLATILE_P (operands[nops + i]))
9631 offset = const0_rtx;
9633 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9634 || (GET_CODE (reg) == SUBREG
9635 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9636 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9637 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9639 || (GET_CODE (reg) == SUBREG
9640 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9641 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9646 base_reg = REGNO (reg);
9648 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9651 else if (base_reg != (int) REGNO (reg))
9652 /* Not addressed from the same base register. */
9655 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9656 ? REGNO (operands[i])
9657 : REGNO (SUBREG_REG (operands[i])));
9659 /* If it isn't an integer register, or if it overwrites the
9660 base register but isn't the last insn in the list, then
9661 we can't do this. */
9662 if (unsorted_regs[i] < 0
9663 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9664 || unsorted_regs[i] > 14
9665 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9668 unsorted_offsets[i] = INTVAL (offset);
9669 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9673 /* Not a suitable memory address. */
9677 /* All the useful information has now been extracted from the
9678 operands into unsorted_regs and unsorted_offsets; additionally,
9679 order[0] has been set to the lowest offset in the list. Sort
9680 the offsets into order, verifying that they are adjacent, and
9681 check that the register numbers are ascending. */
9682 if (!compute_offset_order (nops, unsorted_offsets, order,
9683 check_regs ? unsorted_regs : NULL))
9687 memcpy (saved_order, order, sizeof order);
9693 for (i = 0; i < nops; i++)
9694 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9696 *load_offset = unsorted_offsets[order[0]];
9700 && !peep2_reg_dead_p (nops, base_reg_rtx))
9703 if (unsorted_offsets[order[0]] == 0)
9704 ldm_case = 1; /* ldmia */
9705 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9706 ldm_case = 2; /* ldmib */
9707 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9708 ldm_case = 3; /* ldmda */
9709 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9710 ldm_case = 4; /* ldmdb */
9711 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9712 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9717 if (!multiple_operation_profitable_p (false, nops,
9719 ? unsorted_offsets[order[0]] : 0))
9725 /* Used to determine in a peephole whether a sequence of store instructions can
9726 be changed into a store-multiple instruction.
9727 NOPS is the number of separate store instructions we are examining.
9728 NOPS_TOTAL is the total number of instructions recognized by the peephole
9730 The first NOPS entries in OPERANDS are the source registers, the next
9731 NOPS entries are memory operands. If this function is successful, *BASE is
9732 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9733 to the first memory location's offset from that base register. REGS is an
9734 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9735 likewise filled with the corresponding rtx's.
9736 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9737 numbers to to an ascending order of stores.
9738 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9739 from ascending memory locations, and the function verifies that the register
9740 numbers are themselves ascending. If CHECK_REGS is false, the register
9741 numbers are stored in the order they are found in the operands. */
9743 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9744 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9745 HOST_WIDE_INT *load_offset, bool check_regs)
9747 int unsorted_regs[MAX_LDM_STM_OPS];
9748 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9749 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9750 int order[MAX_LDM_STM_OPS];
9752 rtx base_reg_rtx = NULL;
9755 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9756 easily extended if required. */
9757 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9759 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9761 /* Loop over the operands and check that the memory references are
9762 suitable (i.e. immediate offsets from the same base register). At
9763 the same time, extract the target register, and the memory
9765 for (i = 0; i < nops; i++)
9770 /* Convert a subreg of a mem into the mem itself. */
9771 if (GET_CODE (operands[nops + i]) == SUBREG)
9772 operands[nops + i] = alter_subreg (operands + (nops + i));
9774 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9776 /* Don't reorder volatile memory references; it doesn't seem worth
9777 looking for the case where the order is ok anyway. */
9778 if (MEM_VOLATILE_P (operands[nops + i]))
9781 offset = const0_rtx;
9783 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9784 || (GET_CODE (reg) == SUBREG
9785 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9786 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9787 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9789 || (GET_CODE (reg) == SUBREG
9790 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9791 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9794 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9795 ? operands[i] : SUBREG_REG (operands[i]));
9796 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9800 base_reg = REGNO (reg);
9802 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9805 else if (base_reg != (int) REGNO (reg))
9806 /* Not addressed from the same base register. */
9809 /* If it isn't an integer register, then we can't do this. */
9810 if (unsorted_regs[i] < 0
9811 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9812 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9813 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9814 || unsorted_regs[i] > 14)
9817 unsorted_offsets[i] = INTVAL (offset);
9818 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9822 /* Not a suitable memory address. */
9826 /* All the useful information has now been extracted from the
9827 operands into unsorted_regs and unsorted_offsets; additionally,
9828 order[0] has been set to the lowest offset in the list. Sort
9829 the offsets into order, verifying that they are adjacent, and
9830 check that the register numbers are ascending. */
9831 if (!compute_offset_order (nops, unsorted_offsets, order,
9832 check_regs ? unsorted_regs : NULL))
9836 memcpy (saved_order, order, sizeof order);
9842 for (i = 0; i < nops; i++)
9844 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9846 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9849 *load_offset = unsorted_offsets[order[0]];
9853 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9856 if (unsorted_offsets[order[0]] == 0)
9857 stm_case = 1; /* stmia */
9858 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9859 stm_case = 2; /* stmib */
9860 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9861 stm_case = 3; /* stmda */
9862 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9863 stm_case = 4; /* stmdb */
9867 if (!multiple_operation_profitable_p (false, nops, 0))
9873 /* Routines for use in generating RTL. */
9875 /* Generate a load-multiple instruction. COUNT is the number of loads in
9876 the instruction; REGS and MEMS are arrays containing the operands.
9877 BASEREG is the base register to be used in addressing the memory operands.
9878 WBACK_OFFSET is nonzero if the instruction should update the base
9882 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9883 HOST_WIDE_INT wback_offset)
9888 if (!multiple_operation_profitable_p (false, count, 0))
9894 for (i = 0; i < count; i++)
9895 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9897 if (wback_offset != 0)
9898 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9906 result = gen_rtx_PARALLEL (VOIDmode,
9907 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9908 if (wback_offset != 0)
9910 XVECEXP (result, 0, 0)
9911 = gen_rtx_SET (VOIDmode, basereg,
9912 plus_constant (basereg, wback_offset));
9917 for (j = 0; i < count; i++, j++)
9918 XVECEXP (result, 0, i)
9919 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9924 /* Generate a store-multiple instruction. COUNT is the number of stores in
9925 the instruction; REGS and MEMS are arrays containing the operands.
9926 BASEREG is the base register to be used in addressing the memory operands.
9927 WBACK_OFFSET is nonzero if the instruction should update the base
9931 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9932 HOST_WIDE_INT wback_offset)
9937 if (GET_CODE (basereg) == PLUS)
9938 basereg = XEXP (basereg, 0);
9940 if (!multiple_operation_profitable_p (false, count, 0))
9946 for (i = 0; i < count; i++)
9947 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9949 if (wback_offset != 0)
9950 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9958 result = gen_rtx_PARALLEL (VOIDmode,
9959 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9960 if (wback_offset != 0)
9962 XVECEXP (result, 0, 0)
9963 = gen_rtx_SET (VOIDmode, basereg,
9964 plus_constant (basereg, wback_offset));
9969 for (j = 0; i < count; i++, j++)
9970 XVECEXP (result, 0, i)
9971 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9976 /* Generate either a load-multiple or a store-multiple instruction. This
9977 function can be used in situations where we can start with a single MEM
9978 rtx and adjust its address upwards.
9979 COUNT is the number of operations in the instruction, not counting a
9980 possible update of the base register. REGS is an array containing the
9982 BASEREG is the base register to be used in addressing the memory operands,
9983 which are constructed from BASEMEM.
9984 WRITE_BACK specifies whether the generated instruction should include an
9985 update of the base register.
9986 OFFSETP is used to pass an offset to and from this function; this offset
9987 is not used when constructing the address (instead BASEMEM should have an
9988 appropriate offset in its address), it is used only for setting
9989 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9992 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9993 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9995 rtx mems[MAX_LDM_STM_OPS];
9996 HOST_WIDE_INT offset = *offsetp;
9999 gcc_assert (count <= MAX_LDM_STM_OPS);
10001 if (GET_CODE (basereg) == PLUS)
10002 basereg = XEXP (basereg, 0);
10004 for (i = 0; i < count; i++)
10006 rtx addr = plus_constant (basereg, i * 4);
10007 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10015 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10016 write_back ? 4 * count : 0);
10018 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10019 write_back ? 4 * count : 0);
10023 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10024 rtx basemem, HOST_WIDE_INT *offsetp)
10026 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10031 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10032 rtx basemem, HOST_WIDE_INT *offsetp)
10034 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10038 /* Called from a peephole2 expander to turn a sequence of loads into an
10039 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10040 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10041 is true if we can reorder the registers because they are used commutatively
10043 Returns true iff we could generate a new instruction. */
10046 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10048 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10049 rtx mems[MAX_LDM_STM_OPS];
10050 int i, j, base_reg;
10052 HOST_WIDE_INT offset;
10053 int write_back = FALSE;
10057 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10058 &base_reg, &offset, !sort_regs);
10064 for (i = 0; i < nops - 1; i++)
10065 for (j = i + 1; j < nops; j++)
10066 if (regs[i] > regs[j])
10072 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10076 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10077 gcc_assert (ldm_case == 1 || ldm_case == 5);
10083 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10084 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10086 if (!TARGET_THUMB1)
10088 base_reg = regs[0];
10089 base_reg_rtx = newbase;
10093 for (i = 0; i < nops; i++)
10095 addr = plus_constant (base_reg_rtx, offset + i * 4);
10096 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10099 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10100 write_back ? offset + i * 4 : 0));
10104 /* Called from a peephole2 expander to turn a sequence of stores into an
10105 STM instruction. OPERANDS are the operands found by the peephole matcher;
10106 NOPS indicates how many separate stores we are trying to combine.
10107 Returns true iff we could generate a new instruction. */
10110 gen_stm_seq (rtx *operands, int nops)
10113 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10114 rtx mems[MAX_LDM_STM_OPS];
10117 HOST_WIDE_INT offset;
10118 int write_back = FALSE;
10121 bool base_reg_dies;
10123 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10124 mem_order, &base_reg, &offset, true);
10129 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10131 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10134 gcc_assert (base_reg_dies);
10140 gcc_assert (base_reg_dies);
10141 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10145 addr = plus_constant (base_reg_rtx, offset);
10147 for (i = 0; i < nops; i++)
10149 addr = plus_constant (base_reg_rtx, offset + i * 4);
10150 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10153 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10154 write_back ? offset + i * 4 : 0));
10158 /* Called from a peephole2 expander to turn a sequence of stores that are
10159 preceded by constant loads into an STM instruction. OPERANDS are the
10160 operands found by the peephole matcher; NOPS indicates how many
10161 separate stores we are trying to combine; there are 2 * NOPS
10162 instructions in the peephole.
10163 Returns true iff we could generate a new instruction. */
10166 gen_const_stm_seq (rtx *operands, int nops)
10168 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10169 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10170 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10171 rtx mems[MAX_LDM_STM_OPS];
10174 HOST_WIDE_INT offset;
10175 int write_back = FALSE;
10178 bool base_reg_dies;
10180 HARD_REG_SET allocated;
10182 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10183 mem_order, &base_reg, &offset, false);
10188 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10190 /* If the same register is used more than once, try to find a free
10192 CLEAR_HARD_REG_SET (allocated);
10193 for (i = 0; i < nops; i++)
10195 for (j = i + 1; j < nops; j++)
10196 if (regs[i] == regs[j])
10198 rtx t = peep2_find_free_register (0, nops * 2,
10199 TARGET_THUMB1 ? "l" : "r",
10200 SImode, &allocated);
10204 regs[i] = REGNO (t);
10208 /* Compute an ordering that maps the register numbers to an ascending
10211 for (i = 0; i < nops; i++)
10212 if (regs[i] < regs[reg_order[0]])
10215 for (i = 1; i < nops; i++)
10217 int this_order = reg_order[i - 1];
10218 for (j = 0; j < nops; j++)
10219 if (regs[j] > regs[reg_order[i - 1]]
10220 && (this_order == reg_order[i - 1]
10221 || regs[j] < regs[this_order]))
10223 reg_order[i] = this_order;
10226 /* Ensure that registers that must be live after the instruction end
10227 up with the correct value. */
10228 for (i = 0; i < nops; i++)
10230 int this_order = reg_order[i];
10231 if ((this_order != mem_order[i]
10232 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10233 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10237 /* Load the constants. */
10238 for (i = 0; i < nops; i++)
10240 rtx op = operands[2 * nops + mem_order[i]];
10241 sorted_regs[i] = regs[reg_order[i]];
10242 emit_move_insn (reg_rtxs[reg_order[i]], op);
10245 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10247 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10250 gcc_assert (base_reg_dies);
10256 gcc_assert (base_reg_dies);
10257 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10261 addr = plus_constant (base_reg_rtx, offset);
10263 for (i = 0; i < nops; i++)
10265 addr = plus_constant (base_reg_rtx, offset + i * 4);
10266 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10269 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10270 write_back ? offset + i * 4 : 0));
10275 arm_gen_movmemqi (rtx *operands)
10277 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10278 HOST_WIDE_INT srcoffset, dstoffset;
10280 rtx src, dst, srcbase, dstbase;
10281 rtx part_bytes_reg = NULL;
10284 if (GET_CODE (operands[2]) != CONST_INT
10285 || GET_CODE (operands[3]) != CONST_INT
10286 || INTVAL (operands[2]) > 64
10287 || INTVAL (operands[3]) & 3)
10290 dstbase = operands[0];
10291 srcbase = operands[1];
10293 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10294 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10296 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10297 out_words_to_go = INTVAL (operands[2]) / 4;
10298 last_bytes = INTVAL (operands[2]) & 3;
10299 dstoffset = srcoffset = 0;
10301 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10302 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10304 for (i = 0; in_words_to_go >= 2; i+=4)
10306 if (in_words_to_go > 4)
10307 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10308 TRUE, srcbase, &srcoffset));
10310 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10311 src, FALSE, srcbase,
10314 if (out_words_to_go)
10316 if (out_words_to_go > 4)
10317 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10318 TRUE, dstbase, &dstoffset));
10319 else if (out_words_to_go != 1)
10320 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10321 out_words_to_go, dst,
10324 dstbase, &dstoffset));
10327 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10328 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10329 if (last_bytes != 0)
10331 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10337 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10338 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10341 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10342 if (out_words_to_go)
10346 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10347 sreg = copy_to_reg (mem);
10349 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10350 emit_move_insn (mem, sreg);
10353 gcc_assert (!in_words_to_go); /* Sanity check */
10356 if (in_words_to_go)
10358 gcc_assert (in_words_to_go > 0);
10360 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10361 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10364 gcc_assert (!last_bytes || part_bytes_reg);
10366 if (BYTES_BIG_ENDIAN && last_bytes)
10368 rtx tmp = gen_reg_rtx (SImode);
10370 /* The bytes we want are in the top end of the word. */
10371 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10372 GEN_INT (8 * (4 - last_bytes))));
10373 part_bytes_reg = tmp;
10377 mem = adjust_automodify_address (dstbase, QImode,
10378 plus_constant (dst, last_bytes - 1),
10379 dstoffset + last_bytes - 1);
10380 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10384 tmp = gen_reg_rtx (SImode);
10385 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10386 part_bytes_reg = tmp;
10393 if (last_bytes > 1)
10395 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10396 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10400 rtx tmp = gen_reg_rtx (SImode);
10401 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10402 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10403 part_bytes_reg = tmp;
10410 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10411 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10418 /* Select a dominance comparison mode if possible for a test of the general
10419 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10420 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10421 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10422 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10423 In all cases OP will be either EQ or NE, but we don't need to know which
10424 here. If we are unable to support a dominance comparison we return
10425 CC mode. This will then fail to match for the RTL expressions that
10426 generate this call. */
10428 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10430 enum rtx_code cond1, cond2;
10433 /* Currently we will probably get the wrong result if the individual
10434 comparisons are not simple. This also ensures that it is safe to
10435 reverse a comparison if necessary. */
10436 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10438 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10442 /* The if_then_else variant of this tests the second condition if the
10443 first passes, but is true if the first fails. Reverse the first
10444 condition to get a true "inclusive-or" expression. */
10445 if (cond_or == DOM_CC_NX_OR_Y)
10446 cond1 = reverse_condition (cond1);
10448 /* If the comparisons are not equal, and one doesn't dominate the other,
10449 then we can't do this. */
10451 && !comparison_dominates_p (cond1, cond2)
10452 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10457 enum rtx_code temp = cond1;
10465 if (cond_or == DOM_CC_X_AND_Y)
10470 case EQ: return CC_DEQmode;
10471 case LE: return CC_DLEmode;
10472 case LEU: return CC_DLEUmode;
10473 case GE: return CC_DGEmode;
10474 case GEU: return CC_DGEUmode;
10475 default: gcc_unreachable ();
10479 if (cond_or == DOM_CC_X_AND_Y)
10491 gcc_unreachable ();
10495 if (cond_or == DOM_CC_X_AND_Y)
10507 gcc_unreachable ();
10511 if (cond_or == DOM_CC_X_AND_Y)
10512 return CC_DLTUmode;
10517 return CC_DLTUmode;
10519 return CC_DLEUmode;
10523 gcc_unreachable ();
10527 if (cond_or == DOM_CC_X_AND_Y)
10528 return CC_DGTUmode;
10533 return CC_DGTUmode;
10535 return CC_DGEUmode;
10539 gcc_unreachable ();
10542 /* The remaining cases only occur when both comparisons are the
10545 gcc_assert (cond1 == cond2);
10549 gcc_assert (cond1 == cond2);
10553 gcc_assert (cond1 == cond2);
10557 gcc_assert (cond1 == cond2);
10558 return CC_DLEUmode;
10561 gcc_assert (cond1 == cond2);
10562 return CC_DGEUmode;
10565 gcc_unreachable ();
10570 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10572 /* All floating point compares return CCFP if it is an equality
10573 comparison, and CCFPE otherwise. */
10574 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10594 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10599 gcc_unreachable ();
10603 /* A compare with a shifted operand. Because of canonicalization, the
10604 comparison will have to be swapped when we emit the assembler. */
10605 if (GET_MODE (y) == SImode
10606 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10607 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10608 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10609 || GET_CODE (x) == ROTATERT))
10612 /* This operation is performed swapped, but since we only rely on the Z
10613 flag we don't need an additional mode. */
10614 if (GET_MODE (y) == SImode
10615 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10616 && GET_CODE (x) == NEG
10617 && (op == EQ || op == NE))
10620 /* This is a special case that is used by combine to allow a
10621 comparison of a shifted byte load to be split into a zero-extend
10622 followed by a comparison of the shifted integer (only valid for
10623 equalities and unsigned inequalities). */
10624 if (GET_MODE (x) == SImode
10625 && GET_CODE (x) == ASHIFT
10626 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10627 && GET_CODE (XEXP (x, 0)) == SUBREG
10628 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10629 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10630 && (op == EQ || op == NE
10631 || op == GEU || op == GTU || op == LTU || op == LEU)
10632 && GET_CODE (y) == CONST_INT)
10635 /* A construct for a conditional compare, if the false arm contains
10636 0, then both conditions must be true, otherwise either condition
10637 must be true. Not all conditions are possible, so CCmode is
10638 returned if it can't be done. */
10639 if (GET_CODE (x) == IF_THEN_ELSE
10640 && (XEXP (x, 2) == const0_rtx
10641 || XEXP (x, 2) == const1_rtx)
10642 && COMPARISON_P (XEXP (x, 0))
10643 && COMPARISON_P (XEXP (x, 1)))
10644 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10645 INTVAL (XEXP (x, 2)));
10647 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10648 if (GET_CODE (x) == AND
10649 && (op == EQ || op == NE)
10650 && COMPARISON_P (XEXP (x, 0))
10651 && COMPARISON_P (XEXP (x, 1)))
10652 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10655 if (GET_CODE (x) == IOR
10656 && (op == EQ || op == NE)
10657 && COMPARISON_P (XEXP (x, 0))
10658 && COMPARISON_P (XEXP (x, 1)))
10659 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10662 /* An operation (on Thumb) where we want to test for a single bit.
10663 This is done by shifting that bit up into the top bit of a
10664 scratch register; we can then branch on the sign bit. */
10666 && GET_MODE (x) == SImode
10667 && (op == EQ || op == NE)
10668 && GET_CODE (x) == ZERO_EXTRACT
10669 && XEXP (x, 1) == const1_rtx)
10672 /* An operation that sets the condition codes as a side-effect, the
10673 V flag is not set correctly, so we can only use comparisons where
10674 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10676 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10677 if (GET_MODE (x) == SImode
10679 && (op == EQ || op == NE || op == LT || op == GE)
10680 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10681 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10682 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10683 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10684 || GET_CODE (x) == LSHIFTRT
10685 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10686 || GET_CODE (x) == ROTATERT
10687 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10688 return CC_NOOVmode;
10690 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10693 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10694 && GET_CODE (x) == PLUS
10695 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10698 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10700 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10702 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10709 /* A DImode comparison against zero can be implemented by
10710 or'ing the two halves together. */
10711 if (y == const0_rtx)
10714 /* We can do an equality test in three Thumb instructions. */
10724 /* DImode unsigned comparisons can be implemented by cmp +
10725 cmpeq without a scratch register. Not worth doing in
10736 /* DImode signed and unsigned comparisons can be implemented
10737 by cmp + sbcs with a scratch register, but that does not
10738 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10739 gcc_assert (op != EQ && op != NE);
10743 gcc_unreachable ();
10750 /* X and Y are two things to compare using CODE. Emit the compare insn and
10751 return the rtx for register 0 in the proper mode. FP means this is a
10752 floating point compare: I don't think that it is needed on the arm. */
10754 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10756 enum machine_mode mode;
10758 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10760 /* We might have X as a constant, Y as a register because of the predicates
10761 used for cmpdi. If so, force X to a register here. */
10762 if (dimode_comparison && !REG_P (x))
10763 x = force_reg (DImode, x);
10765 mode = SELECT_CC_MODE (code, x, y);
10766 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10768 if (dimode_comparison
10769 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10770 && mode != CC_CZmode)
10774 /* To compare two non-zero values for equality, XOR them and
10775 then compare against zero. Not used for ARM mode; there
10776 CC_CZmode is cheaper. */
10777 if (mode == CC_Zmode && y != const0_rtx)
10779 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10782 /* A scratch register is required. */
10783 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10784 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10785 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10788 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10793 /* Generate a sequence of insns that will generate the correct return
10794 address mask depending on the physical architecture that the program
10797 arm_gen_return_addr_mask (void)
10799 rtx reg = gen_reg_rtx (Pmode);
10801 emit_insn (gen_return_addr_mask (reg));
10806 arm_reload_in_hi (rtx *operands)
10808 rtx ref = operands[1];
10810 HOST_WIDE_INT offset = 0;
10812 if (GET_CODE (ref) == SUBREG)
10814 offset = SUBREG_BYTE (ref);
10815 ref = SUBREG_REG (ref);
10818 if (GET_CODE (ref) == REG)
10820 /* We have a pseudo which has been spilt onto the stack; there
10821 are two cases here: the first where there is a simple
10822 stack-slot replacement and a second where the stack-slot is
10823 out of range, or is used as a subreg. */
10824 if (reg_equiv_mem[REGNO (ref)])
10826 ref = reg_equiv_mem[REGNO (ref)];
10827 base = find_replacement (&XEXP (ref, 0));
10830 /* The slot is out of range, or was dressed up in a SUBREG. */
10831 base = reg_equiv_address[REGNO (ref)];
10834 base = find_replacement (&XEXP (ref, 0));
10836 /* Handle the case where the address is too complex to be offset by 1. */
10837 if (GET_CODE (base) == MINUS
10838 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10840 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10842 emit_set_insn (base_plus, base);
10845 else if (GET_CODE (base) == PLUS)
10847 /* The addend must be CONST_INT, or we would have dealt with it above. */
10848 HOST_WIDE_INT hi, lo;
10850 offset += INTVAL (XEXP (base, 1));
10851 base = XEXP (base, 0);
10853 /* Rework the address into a legal sequence of insns. */
10854 /* Valid range for lo is -4095 -> 4095 */
10857 : -((-offset) & 0xfff));
10859 /* Corner case, if lo is the max offset then we would be out of range
10860 once we have added the additional 1 below, so bump the msb into the
10861 pre-loading insn(s). */
10865 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10866 ^ (HOST_WIDE_INT) 0x80000000)
10867 - (HOST_WIDE_INT) 0x80000000);
10869 gcc_assert (hi + lo == offset);
10873 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10875 /* Get the base address; addsi3 knows how to handle constants
10876 that require more than one insn. */
10877 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10883 /* Operands[2] may overlap operands[0] (though it won't overlap
10884 operands[1]), that's why we asked for a DImode reg -- so we can
10885 use the bit that does not overlap. */
10886 if (REGNO (operands[2]) == REGNO (operands[0]))
10887 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10889 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10891 emit_insn (gen_zero_extendqisi2 (scratch,
10892 gen_rtx_MEM (QImode,
10893 plus_constant (base,
10895 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10896 gen_rtx_MEM (QImode,
10897 plus_constant (base,
10899 if (!BYTES_BIG_ENDIAN)
10900 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10901 gen_rtx_IOR (SImode,
10904 gen_rtx_SUBREG (SImode, operands[0], 0),
10908 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10909 gen_rtx_IOR (SImode,
10910 gen_rtx_ASHIFT (SImode, scratch,
10912 gen_rtx_SUBREG (SImode, operands[0], 0)));
10915 /* Handle storing a half-word to memory during reload by synthesizing as two
10916 byte stores. Take care not to clobber the input values until after we
10917 have moved them somewhere safe. This code assumes that if the DImode
10918 scratch in operands[2] overlaps either the input value or output address
10919 in some way, then that value must die in this insn (we absolutely need
10920 two scratch registers for some corner cases). */
10922 arm_reload_out_hi (rtx *operands)
10924 rtx ref = operands[0];
10925 rtx outval = operands[1];
10927 HOST_WIDE_INT offset = 0;
10929 if (GET_CODE (ref) == SUBREG)
10931 offset = SUBREG_BYTE (ref);
10932 ref = SUBREG_REG (ref);
10935 if (GET_CODE (ref) == REG)
10937 /* We have a pseudo which has been spilt onto the stack; there
10938 are two cases here: the first where there is a simple
10939 stack-slot replacement and a second where the stack-slot is
10940 out of range, or is used as a subreg. */
10941 if (reg_equiv_mem[REGNO (ref)])
10943 ref = reg_equiv_mem[REGNO (ref)];
10944 base = find_replacement (&XEXP (ref, 0));
10947 /* The slot is out of range, or was dressed up in a SUBREG. */
10948 base = reg_equiv_address[REGNO (ref)];
10951 base = find_replacement (&XEXP (ref, 0));
10953 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10955 /* Handle the case where the address is too complex to be offset by 1. */
10956 if (GET_CODE (base) == MINUS
10957 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10959 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10961 /* Be careful not to destroy OUTVAL. */
10962 if (reg_overlap_mentioned_p (base_plus, outval))
10964 /* Updating base_plus might destroy outval, see if we can
10965 swap the scratch and base_plus. */
10966 if (!reg_overlap_mentioned_p (scratch, outval))
10969 scratch = base_plus;
10974 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10976 /* Be conservative and copy OUTVAL into the scratch now,
10977 this should only be necessary if outval is a subreg
10978 of something larger than a word. */
10979 /* XXX Might this clobber base? I can't see how it can,
10980 since scratch is known to overlap with OUTVAL, and
10981 must be wider than a word. */
10982 emit_insn (gen_movhi (scratch_hi, outval));
10983 outval = scratch_hi;
10987 emit_set_insn (base_plus, base);
10990 else if (GET_CODE (base) == PLUS)
10992 /* The addend must be CONST_INT, or we would have dealt with it above. */
10993 HOST_WIDE_INT hi, lo;
10995 offset += INTVAL (XEXP (base, 1));
10996 base = XEXP (base, 0);
10998 /* Rework the address into a legal sequence of insns. */
10999 /* Valid range for lo is -4095 -> 4095 */
11002 : -((-offset) & 0xfff));
11004 /* Corner case, if lo is the max offset then we would be out of range
11005 once we have added the additional 1 below, so bump the msb into the
11006 pre-loading insn(s). */
11010 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11011 ^ (HOST_WIDE_INT) 0x80000000)
11012 - (HOST_WIDE_INT) 0x80000000);
11014 gcc_assert (hi + lo == offset);
11018 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11020 /* Be careful not to destroy OUTVAL. */
11021 if (reg_overlap_mentioned_p (base_plus, outval))
11023 /* Updating base_plus might destroy outval, see if we
11024 can swap the scratch and base_plus. */
11025 if (!reg_overlap_mentioned_p (scratch, outval))
11028 scratch = base_plus;
11033 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11035 /* Be conservative and copy outval into scratch now,
11036 this should only be necessary if outval is a
11037 subreg of something larger than a word. */
11038 /* XXX Might this clobber base? I can't see how it
11039 can, since scratch is known to overlap with
11041 emit_insn (gen_movhi (scratch_hi, outval));
11042 outval = scratch_hi;
11046 /* Get the base address; addsi3 knows how to handle constants
11047 that require more than one insn. */
11048 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11054 if (BYTES_BIG_ENDIAN)
11056 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11057 plus_constant (base, offset + 1)),
11058 gen_lowpart (QImode, outval)));
11059 emit_insn (gen_lshrsi3 (scratch,
11060 gen_rtx_SUBREG (SImode, outval, 0),
11062 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11063 gen_lowpart (QImode, scratch)));
11067 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11068 gen_lowpart (QImode, outval)));
11069 emit_insn (gen_lshrsi3 (scratch,
11070 gen_rtx_SUBREG (SImode, outval, 0),
11072 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11073 plus_constant (base, offset + 1)),
11074 gen_lowpart (QImode, scratch)));
11078 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11079 (padded to the size of a word) should be passed in a register. */
11082 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11084 if (TARGET_AAPCS_BASED)
11085 return must_pass_in_stack_var_size (mode, type);
11087 return must_pass_in_stack_var_size_or_pad (mode, type);
11091 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11092 Return true if an argument passed on the stack should be padded upwards,
11093 i.e. if the least-significant byte has useful data.
11094 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11095 aggregate types are placed in the lowest memory address. */
11098 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11100 if (!TARGET_AAPCS_BASED)
11101 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11103 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11110 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11111 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11112 byte of the register has useful data, and return the opposite if the
11113 most significant byte does.
11114 For AAPCS, small aggregates and small complex types are always padded
11118 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11119 tree type, int first ATTRIBUTE_UNUSED)
11121 if (TARGET_AAPCS_BASED
11122 && BYTES_BIG_ENDIAN
11123 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11124 && int_size_in_bytes (type) <= 4)
11127 /* Otherwise, use default padding. */
11128 return !BYTES_BIG_ENDIAN;
11132 /* Print a symbolic form of X to the debug file, F. */
11134 arm_print_value (FILE *f, rtx x)
11136 switch (GET_CODE (x))
11139 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11143 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11151 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11153 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11154 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11162 fprintf (f, "\"%s\"", XSTR (x, 0));
11166 fprintf (f, "`%s'", XSTR (x, 0));
11170 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11174 arm_print_value (f, XEXP (x, 0));
11178 arm_print_value (f, XEXP (x, 0));
11180 arm_print_value (f, XEXP (x, 1));
11188 fprintf (f, "????");
11193 /* Routines for manipulation of the constant pool. */
11195 /* Arm instructions cannot load a large constant directly into a
11196 register; they have to come from a pc relative load. The constant
11197 must therefore be placed in the addressable range of the pc
11198 relative load. Depending on the precise pc relative load
11199 instruction the range is somewhere between 256 bytes and 4k. This
11200 means that we often have to dump a constant inside a function, and
11201 generate code to branch around it.
11203 It is important to minimize this, since the branches will slow
11204 things down and make the code larger.
11206 Normally we can hide the table after an existing unconditional
11207 branch so that there is no interruption of the flow, but in the
11208 worst case the code looks like this:
11226 We fix this by performing a scan after scheduling, which notices
11227 which instructions need to have their operands fetched from the
11228 constant table and builds the table.
11230 The algorithm starts by building a table of all the constants that
11231 need fixing up and all the natural barriers in the function (places
11232 where a constant table can be dropped without breaking the flow).
11233 For each fixup we note how far the pc-relative replacement will be
11234 able to reach and the offset of the instruction into the function.
11236 Having built the table we then group the fixes together to form
11237 tables that are as large as possible (subject to addressing
11238 constraints) and emit each table of constants after the last
11239 barrier that is within range of all the instructions in the group.
11240 If a group does not contain a barrier, then we forcibly create one
11241 by inserting a jump instruction into the flow. Once the table has
11242 been inserted, the insns are then modified to reference the
11243 relevant entry in the pool.
11245 Possible enhancements to the algorithm (not implemented) are:
11247 1) For some processors and object formats, there may be benefit in
11248 aligning the pools to the start of cache lines; this alignment
11249 would need to be taken into account when calculating addressability
11252 /* These typedefs are located at the start of this file, so that
11253 they can be used in the prototypes there. This comment is to
11254 remind readers of that fact so that the following structures
11255 can be understood more easily.
11257 typedef struct minipool_node Mnode;
11258 typedef struct minipool_fixup Mfix; */
11260 struct minipool_node
11262 /* Doubly linked chain of entries. */
11265 /* The maximum offset into the code that this entry can be placed. While
11266 pushing fixes for forward references, all entries are sorted in order
11267 of increasing max_address. */
11268 HOST_WIDE_INT max_address;
11269 /* Similarly for an entry inserted for a backwards ref. */
11270 HOST_WIDE_INT min_address;
11271 /* The number of fixes referencing this entry. This can become zero
11272 if we "unpush" an entry. In this case we ignore the entry when we
11273 come to emit the code. */
11275 /* The offset from the start of the minipool. */
11276 HOST_WIDE_INT offset;
11277 /* The value in table. */
11279 /* The mode of value. */
11280 enum machine_mode mode;
11281 /* The size of the value. With iWMMXt enabled
11282 sizes > 4 also imply an alignment of 8-bytes. */
11286 struct minipool_fixup
11290 HOST_WIDE_INT address;
11292 enum machine_mode mode;
11296 HOST_WIDE_INT forwards;
11297 HOST_WIDE_INT backwards;
11300 /* Fixes less than a word need padding out to a word boundary. */
11301 #define MINIPOOL_FIX_SIZE(mode) \
11302 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11304 static Mnode * minipool_vector_head;
11305 static Mnode * minipool_vector_tail;
11306 static rtx minipool_vector_label;
11307 static int minipool_pad;
11309 /* The linked list of all minipool fixes required for this function. */
11310 Mfix * minipool_fix_head;
11311 Mfix * minipool_fix_tail;
11312 /* The fix entry for the current minipool, once it has been placed. */
11313 Mfix * minipool_barrier;
11315 /* Determines if INSN is the start of a jump table. Returns the end
11316 of the TABLE or NULL_RTX. */
11318 is_jump_table (rtx insn)
11322 if (GET_CODE (insn) == JUMP_INSN
11323 && JUMP_LABEL (insn) != NULL
11324 && ((table = next_real_insn (JUMP_LABEL (insn)))
11325 == next_real_insn (insn))
11327 && GET_CODE (table) == JUMP_INSN
11328 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11329 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11335 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11336 #define JUMP_TABLES_IN_TEXT_SECTION 0
11339 static HOST_WIDE_INT
11340 get_jump_table_size (rtx insn)
11342 /* ADDR_VECs only take room if read-only data does into the text
11344 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11346 rtx body = PATTERN (insn);
11347 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11348 HOST_WIDE_INT size;
11349 HOST_WIDE_INT modesize;
11351 modesize = GET_MODE_SIZE (GET_MODE (body));
11352 size = modesize * XVECLEN (body, elt);
11356 /* Round up size of TBB table to a halfword boundary. */
11357 size = (size + 1) & ~(HOST_WIDE_INT)1;
11360 /* No padding necessary for TBH. */
11363 /* Add two bytes for alignment on Thumb. */
11368 gcc_unreachable ();
11376 /* Move a minipool fix MP from its current location to before MAX_MP.
11377 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11378 constraints may need updating. */
11380 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11381 HOST_WIDE_INT max_address)
11383 /* The code below assumes these are different. */
11384 gcc_assert (mp != max_mp);
11386 if (max_mp == NULL)
11388 if (max_address < mp->max_address)
11389 mp->max_address = max_address;
11393 if (max_address > max_mp->max_address - mp->fix_size)
11394 mp->max_address = max_mp->max_address - mp->fix_size;
11396 mp->max_address = max_address;
11398 /* Unlink MP from its current position. Since max_mp is non-null,
11399 mp->prev must be non-null. */
11400 mp->prev->next = mp->next;
11401 if (mp->next != NULL)
11402 mp->next->prev = mp->prev;
11404 minipool_vector_tail = mp->prev;
11406 /* Re-insert it before MAX_MP. */
11408 mp->prev = max_mp->prev;
11411 if (mp->prev != NULL)
11412 mp->prev->next = mp;
11414 minipool_vector_head = mp;
11417 /* Save the new entry. */
11420 /* Scan over the preceding entries and adjust their addresses as
11422 while (mp->prev != NULL
11423 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11425 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11432 /* Add a constant to the minipool for a forward reference. Returns the
11433 node added or NULL if the constant will not fit in this pool. */
11435 add_minipool_forward_ref (Mfix *fix)
11437 /* If set, max_mp is the first pool_entry that has a lower
11438 constraint than the one we are trying to add. */
11439 Mnode * max_mp = NULL;
11440 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11443 /* If the minipool starts before the end of FIX->INSN then this FIX
11444 can not be placed into the current pool. Furthermore, adding the
11445 new constant pool entry may cause the pool to start FIX_SIZE bytes
11447 if (minipool_vector_head &&
11448 (fix->address + get_attr_length (fix->insn)
11449 >= minipool_vector_head->max_address - fix->fix_size))
11452 /* Scan the pool to see if a constant with the same value has
11453 already been added. While we are doing this, also note the
11454 location where we must insert the constant if it doesn't already
11456 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11458 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11459 && fix->mode == mp->mode
11460 && (GET_CODE (fix->value) != CODE_LABEL
11461 || (CODE_LABEL_NUMBER (fix->value)
11462 == CODE_LABEL_NUMBER (mp->value)))
11463 && rtx_equal_p (fix->value, mp->value))
11465 /* More than one fix references this entry. */
11467 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11470 /* Note the insertion point if necessary. */
11472 && mp->max_address > max_address)
11475 /* If we are inserting an 8-bytes aligned quantity and
11476 we have not already found an insertion point, then
11477 make sure that all such 8-byte aligned quantities are
11478 placed at the start of the pool. */
11479 if (ARM_DOUBLEWORD_ALIGN
11481 && fix->fix_size >= 8
11482 && mp->fix_size < 8)
11485 max_address = mp->max_address;
11489 /* The value is not currently in the minipool, so we need to create
11490 a new entry for it. If MAX_MP is NULL, the entry will be put on
11491 the end of the list since the placement is less constrained than
11492 any existing entry. Otherwise, we insert the new fix before
11493 MAX_MP and, if necessary, adjust the constraints on the other
11496 mp->fix_size = fix->fix_size;
11497 mp->mode = fix->mode;
11498 mp->value = fix->value;
11500 /* Not yet required for a backwards ref. */
11501 mp->min_address = -65536;
11503 if (max_mp == NULL)
11505 mp->max_address = max_address;
11507 mp->prev = minipool_vector_tail;
11509 if (mp->prev == NULL)
11511 minipool_vector_head = mp;
11512 minipool_vector_label = gen_label_rtx ();
11515 mp->prev->next = mp;
11517 minipool_vector_tail = mp;
11521 if (max_address > max_mp->max_address - mp->fix_size)
11522 mp->max_address = max_mp->max_address - mp->fix_size;
11524 mp->max_address = max_address;
11527 mp->prev = max_mp->prev;
11529 if (mp->prev != NULL)
11530 mp->prev->next = mp;
11532 minipool_vector_head = mp;
11535 /* Save the new entry. */
11538 /* Scan over the preceding entries and adjust their addresses as
11540 while (mp->prev != NULL
11541 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11543 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11551 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11552 HOST_WIDE_INT min_address)
11554 HOST_WIDE_INT offset;
11556 /* The code below assumes these are different. */
11557 gcc_assert (mp != min_mp);
11559 if (min_mp == NULL)
11561 if (min_address > mp->min_address)
11562 mp->min_address = min_address;
11566 /* We will adjust this below if it is too loose. */
11567 mp->min_address = min_address;
11569 /* Unlink MP from its current position. Since min_mp is non-null,
11570 mp->next must be non-null. */
11571 mp->next->prev = mp->prev;
11572 if (mp->prev != NULL)
11573 mp->prev->next = mp->next;
11575 minipool_vector_head = mp->next;
11577 /* Reinsert it after MIN_MP. */
11579 mp->next = min_mp->next;
11581 if (mp->next != NULL)
11582 mp->next->prev = mp;
11584 minipool_vector_tail = mp;
11590 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11592 mp->offset = offset;
11593 if (mp->refcount > 0)
11594 offset += mp->fix_size;
11596 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11597 mp->next->min_address = mp->min_address + mp->fix_size;
11603 /* Add a constant to the minipool for a backward reference. Returns the
11604 node added or NULL if the constant will not fit in this pool.
11606 Note that the code for insertion for a backwards reference can be
11607 somewhat confusing because the calculated offsets for each fix do
11608 not take into account the size of the pool (which is still under
11611 add_minipool_backward_ref (Mfix *fix)
11613 /* If set, min_mp is the last pool_entry that has a lower constraint
11614 than the one we are trying to add. */
11615 Mnode *min_mp = NULL;
11616 /* This can be negative, since it is only a constraint. */
11617 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11620 /* If we can't reach the current pool from this insn, or if we can't
11621 insert this entry at the end of the pool without pushing other
11622 fixes out of range, then we don't try. This ensures that we
11623 can't fail later on. */
11624 if (min_address >= minipool_barrier->address
11625 || (minipool_vector_tail->min_address + fix->fix_size
11626 >= minipool_barrier->address))
11629 /* Scan the pool to see if a constant with the same value has
11630 already been added. While we are doing this, also note the
11631 location where we must insert the constant if it doesn't already
11633 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11635 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11636 && fix->mode == mp->mode
11637 && (GET_CODE (fix->value) != CODE_LABEL
11638 || (CODE_LABEL_NUMBER (fix->value)
11639 == CODE_LABEL_NUMBER (mp->value)))
11640 && rtx_equal_p (fix->value, mp->value)
11641 /* Check that there is enough slack to move this entry to the
11642 end of the table (this is conservative). */
11643 && (mp->max_address
11644 > (minipool_barrier->address
11645 + minipool_vector_tail->offset
11646 + minipool_vector_tail->fix_size)))
11649 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11652 if (min_mp != NULL)
11653 mp->min_address += fix->fix_size;
11656 /* Note the insertion point if necessary. */
11657 if (mp->min_address < min_address)
11659 /* For now, we do not allow the insertion of 8-byte alignment
11660 requiring nodes anywhere but at the start of the pool. */
11661 if (ARM_DOUBLEWORD_ALIGN
11662 && fix->fix_size >= 8 && mp->fix_size < 8)
11667 else if (mp->max_address
11668 < minipool_barrier->address + mp->offset + fix->fix_size)
11670 /* Inserting before this entry would push the fix beyond
11671 its maximum address (which can happen if we have
11672 re-located a forwards fix); force the new fix to come
11674 if (ARM_DOUBLEWORD_ALIGN
11675 && fix->fix_size >= 8 && mp->fix_size < 8)
11680 min_address = mp->min_address + fix->fix_size;
11683 /* Do not insert a non-8-byte aligned quantity before 8-byte
11684 aligned quantities. */
11685 else if (ARM_DOUBLEWORD_ALIGN
11686 && fix->fix_size < 8
11687 && mp->fix_size >= 8)
11690 min_address = mp->min_address + fix->fix_size;
11695 /* We need to create a new entry. */
11697 mp->fix_size = fix->fix_size;
11698 mp->mode = fix->mode;
11699 mp->value = fix->value;
11701 mp->max_address = minipool_barrier->address + 65536;
11703 mp->min_address = min_address;
11705 if (min_mp == NULL)
11708 mp->next = minipool_vector_head;
11710 if (mp->next == NULL)
11712 minipool_vector_tail = mp;
11713 minipool_vector_label = gen_label_rtx ();
11716 mp->next->prev = mp;
11718 minipool_vector_head = mp;
11722 mp->next = min_mp->next;
11726 if (mp->next != NULL)
11727 mp->next->prev = mp;
11729 minipool_vector_tail = mp;
11732 /* Save the new entry. */
11740 /* Scan over the following entries and adjust their offsets. */
11741 while (mp->next != NULL)
11743 if (mp->next->min_address < mp->min_address + mp->fix_size)
11744 mp->next->min_address = mp->min_address + mp->fix_size;
11747 mp->next->offset = mp->offset + mp->fix_size;
11749 mp->next->offset = mp->offset;
11758 assign_minipool_offsets (Mfix *barrier)
11760 HOST_WIDE_INT offset = 0;
11763 minipool_barrier = barrier;
11765 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11767 mp->offset = offset;
11769 if (mp->refcount > 0)
11770 offset += mp->fix_size;
11774 /* Output the literal table */
11776 dump_minipool (rtx scan)
11782 if (ARM_DOUBLEWORD_ALIGN)
11783 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11784 if (mp->refcount > 0 && mp->fix_size >= 8)
11791 fprintf (dump_file,
11792 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11793 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11795 scan = emit_label_after (gen_label_rtx (), scan);
11796 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11797 scan = emit_label_after (minipool_vector_label, scan);
11799 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11801 if (mp->refcount > 0)
11805 fprintf (dump_file,
11806 ";; Offset %u, min %ld, max %ld ",
11807 (unsigned) mp->offset, (unsigned long) mp->min_address,
11808 (unsigned long) mp->max_address);
11809 arm_print_value (dump_file, mp->value);
11810 fputc ('\n', dump_file);
11813 switch (mp->fix_size)
11815 #ifdef HAVE_consttable_1
11817 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11821 #ifdef HAVE_consttable_2
11823 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11827 #ifdef HAVE_consttable_4
11829 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11833 #ifdef HAVE_consttable_8
11835 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11839 #ifdef HAVE_consttable_16
11841 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11846 gcc_unreachable ();
11854 minipool_vector_head = minipool_vector_tail = NULL;
11855 scan = emit_insn_after (gen_consttable_end (), scan);
11856 scan = emit_barrier_after (scan);
11859 /* Return the cost of forcibly inserting a barrier after INSN. */
11861 arm_barrier_cost (rtx insn)
11863 /* Basing the location of the pool on the loop depth is preferable,
11864 but at the moment, the basic block information seems to be
11865 corrupt by this stage of the compilation. */
11866 int base_cost = 50;
11867 rtx next = next_nonnote_insn (insn);
11869 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11872 switch (GET_CODE (insn))
11875 /* It will always be better to place the table before the label, rather
11884 return base_cost - 10;
11887 return base_cost + 10;
11891 /* Find the best place in the insn stream in the range
11892 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11893 Create the barrier by inserting a jump and add a new fix entry for
11896 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11898 HOST_WIDE_INT count = 0;
11900 rtx from = fix->insn;
11901 /* The instruction after which we will insert the jump. */
11902 rtx selected = NULL;
11904 /* The address at which the jump instruction will be placed. */
11905 HOST_WIDE_INT selected_address;
11907 HOST_WIDE_INT max_count = max_address - fix->address;
11908 rtx label = gen_label_rtx ();
11910 selected_cost = arm_barrier_cost (from);
11911 selected_address = fix->address;
11913 while (from && count < max_count)
11918 /* This code shouldn't have been called if there was a natural barrier
11920 gcc_assert (GET_CODE (from) != BARRIER);
11922 /* Count the length of this insn. */
11923 count += get_attr_length (from);
11925 /* If there is a jump table, add its length. */
11926 tmp = is_jump_table (from);
11929 count += get_jump_table_size (tmp);
11931 /* Jump tables aren't in a basic block, so base the cost on
11932 the dispatch insn. If we select this location, we will
11933 still put the pool after the table. */
11934 new_cost = arm_barrier_cost (from);
11936 if (count < max_count
11937 && (!selected || new_cost <= selected_cost))
11940 selected_cost = new_cost;
11941 selected_address = fix->address + count;
11944 /* Continue after the dispatch table. */
11945 from = NEXT_INSN (tmp);
11949 new_cost = arm_barrier_cost (from);
11951 if (count < max_count
11952 && (!selected || new_cost <= selected_cost))
11955 selected_cost = new_cost;
11956 selected_address = fix->address + count;
11959 from = NEXT_INSN (from);
11962 /* Make sure that we found a place to insert the jump. */
11963 gcc_assert (selected);
11965 /* Create a new JUMP_INSN that branches around a barrier. */
11966 from = emit_jump_insn_after (gen_jump (label), selected);
11967 JUMP_LABEL (from) = label;
11968 barrier = emit_barrier_after (from);
11969 emit_label_after (label, barrier);
11971 /* Create a minipool barrier entry for the new barrier. */
11972 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11973 new_fix->insn = barrier;
11974 new_fix->address = selected_address;
11975 new_fix->next = fix->next;
11976 fix->next = new_fix;
11981 /* Record that there is a natural barrier in the insn stream at
11984 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11986 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11989 fix->address = address;
11992 if (minipool_fix_head != NULL)
11993 minipool_fix_tail->next = fix;
11995 minipool_fix_head = fix;
11997 minipool_fix_tail = fix;
12000 /* Record INSN, which will need fixing up to load a value from the
12001 minipool. ADDRESS is the offset of the insn since the start of the
12002 function; LOC is a pointer to the part of the insn which requires
12003 fixing; VALUE is the constant that must be loaded, which is of type
12006 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12007 enum machine_mode mode, rtx value)
12009 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12012 fix->address = address;
12015 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12016 fix->value = value;
12017 fix->forwards = get_attr_pool_range (insn);
12018 fix->backwards = get_attr_neg_pool_range (insn);
12019 fix->minipool = NULL;
12021 /* If an insn doesn't have a range defined for it, then it isn't
12022 expecting to be reworked by this code. Better to stop now than
12023 to generate duff assembly code. */
12024 gcc_assert (fix->forwards || fix->backwards);
12026 /* If an entry requires 8-byte alignment then assume all constant pools
12027 require 4 bytes of padding. Trying to do this later on a per-pool
12028 basis is awkward because existing pool entries have to be modified. */
12029 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12034 fprintf (dump_file,
12035 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12036 GET_MODE_NAME (mode),
12037 INSN_UID (insn), (unsigned long) address,
12038 -1 * (long)fix->backwards, (long)fix->forwards);
12039 arm_print_value (dump_file, fix->value);
12040 fprintf (dump_file, "\n");
12043 /* Add it to the chain of fixes. */
12046 if (minipool_fix_head != NULL)
12047 minipool_fix_tail->next = fix;
12049 minipool_fix_head = fix;
12051 minipool_fix_tail = fix;
12054 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12055 Returns the number of insns needed, or 99 if we don't know how to
12058 arm_const_double_inline_cost (rtx val)
12060 rtx lowpart, highpart;
12061 enum machine_mode mode;
12063 mode = GET_MODE (val);
12065 if (mode == VOIDmode)
12068 gcc_assert (GET_MODE_SIZE (mode) == 8);
12070 lowpart = gen_lowpart (SImode, val);
12071 highpart = gen_highpart_mode (SImode, mode, val);
12073 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12074 gcc_assert (GET_CODE (highpart) == CONST_INT);
12076 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12077 NULL_RTX, NULL_RTX, 0, 0)
12078 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12079 NULL_RTX, NULL_RTX, 0, 0));
12082 /* Return true if it is worthwhile to split a 64-bit constant into two
12083 32-bit operations. This is the case if optimizing for size, or
12084 if we have load delay slots, or if one 32-bit part can be done with
12085 a single data operation. */
12087 arm_const_double_by_parts (rtx val)
12089 enum machine_mode mode = GET_MODE (val);
12092 if (optimize_size || arm_ld_sched)
12095 if (mode == VOIDmode)
12098 part = gen_highpart_mode (SImode, mode, val);
12100 gcc_assert (GET_CODE (part) == CONST_INT);
12102 if (const_ok_for_arm (INTVAL (part))
12103 || const_ok_for_arm (~INTVAL (part)))
12106 part = gen_lowpart (SImode, val);
12108 gcc_assert (GET_CODE (part) == CONST_INT);
12110 if (const_ok_for_arm (INTVAL (part))
12111 || const_ok_for_arm (~INTVAL (part)))
12117 /* Return true if it is possible to inline both the high and low parts
12118 of a 64-bit constant into 32-bit data processing instructions. */
12120 arm_const_double_by_immediates (rtx val)
12122 enum machine_mode mode = GET_MODE (val);
12125 if (mode == VOIDmode)
12128 part = gen_highpart_mode (SImode, mode, val);
12130 gcc_assert (GET_CODE (part) == CONST_INT);
12132 if (!const_ok_for_arm (INTVAL (part)))
12135 part = gen_lowpart (SImode, val);
12137 gcc_assert (GET_CODE (part) == CONST_INT);
12139 if (!const_ok_for_arm (INTVAL (part)))
12145 /* Scan INSN and note any of its operands that need fixing.
12146 If DO_PUSHES is false we do not actually push any of the fixups
12147 needed. The function returns TRUE if any fixups were needed/pushed.
12148 This is used by arm_memory_load_p() which needs to know about loads
12149 of constants that will be converted into minipool loads. */
12151 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12153 bool result = false;
12156 extract_insn (insn);
12158 if (!constrain_operands (1))
12159 fatal_insn_not_found (insn);
12161 if (recog_data.n_alternatives == 0)
12164 /* Fill in recog_op_alt with information about the constraints of
12166 preprocess_constraints ();
12168 for (opno = 0; opno < recog_data.n_operands; opno++)
12170 /* Things we need to fix can only occur in inputs. */
12171 if (recog_data.operand_type[opno] != OP_IN)
12174 /* If this alternative is a memory reference, then any mention
12175 of constants in this alternative is really to fool reload
12176 into allowing us to accept one there. We need to fix them up
12177 now so that we output the right code. */
12178 if (recog_op_alt[opno][which_alternative].memory_ok)
12180 rtx op = recog_data.operand[opno];
12182 if (CONSTANT_P (op))
12185 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12186 recog_data.operand_mode[opno], op);
12189 else if (GET_CODE (op) == MEM
12190 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12191 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12195 rtx cop = avoid_constant_pool_reference (op);
12197 /* Casting the address of something to a mode narrower
12198 than a word can cause avoid_constant_pool_reference()
12199 to return the pool reference itself. That's no good to
12200 us here. Lets just hope that we can use the
12201 constant pool value directly. */
12203 cop = get_pool_constant (XEXP (op, 0));
12205 push_minipool_fix (insn, address,
12206 recog_data.operand_loc[opno],
12207 recog_data.operand_mode[opno], cop);
12218 /* Convert instructions to their cc-clobbering variant if possible, since
12219 that allows us to use smaller encodings. */
12222 thumb2_reorg (void)
12227 INIT_REG_SET (&live);
12229 /* We are freeing block_for_insn in the toplev to keep compatibility
12230 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12231 compute_bb_for_insn ();
12238 COPY_REG_SET (&live, DF_LR_OUT (bb));
12239 df_simulate_initialize_backwards (bb, &live);
12240 FOR_BB_INSNS_REVERSE (bb, insn)
12242 if (NONJUMP_INSN_P (insn)
12243 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12245 rtx pat = PATTERN (insn);
12246 if (GET_CODE (pat) == SET
12247 && low_register_operand (XEXP (pat, 0), SImode)
12248 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12249 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12250 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12252 rtx dst = XEXP (pat, 0);
12253 rtx src = XEXP (pat, 1);
12254 rtx op0 = XEXP (src, 0);
12255 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12256 ? XEXP (src, 1) : NULL);
12258 if (rtx_equal_p (dst, op0)
12259 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12261 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12262 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12263 rtvec vec = gen_rtvec (2, pat, clobber);
12265 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12266 INSN_CODE (insn) = -1;
12268 /* We can also handle a commutative operation where the
12269 second operand matches the destination. */
12270 else if (op1 && rtx_equal_p (dst, op1))
12272 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12273 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12276 src = copy_rtx (src);
12277 XEXP (src, 0) = op1;
12278 XEXP (src, 1) = op0;
12279 pat = gen_rtx_SET (VOIDmode, dst, src);
12280 vec = gen_rtvec (2, pat, clobber);
12281 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12282 INSN_CODE (insn) = -1;
12287 if (NONDEBUG_INSN_P (insn))
12288 df_simulate_one_insn_backwards (bb, insn, &live);
12292 CLEAR_REG_SET (&live);
12295 /* Gcc puts the pool in the wrong place for ARM, since we can only
12296 load addresses a limited distance around the pc. We do some
12297 special munging to move the constant pool values to the correct
12298 point in the code. */
12303 HOST_WIDE_INT address = 0;
12309 minipool_fix_head = minipool_fix_tail = NULL;
12311 /* The first insn must always be a note, or the code below won't
12312 scan it properly. */
12313 insn = get_insns ();
12314 gcc_assert (GET_CODE (insn) == NOTE);
12317 /* Scan all the insns and record the operands that will need fixing. */
12318 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12320 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12321 && (arm_cirrus_insn_p (insn)
12322 || GET_CODE (insn) == JUMP_INSN
12323 || arm_memory_load_p (insn)))
12324 cirrus_reorg (insn);
12326 if (GET_CODE (insn) == BARRIER)
12327 push_minipool_barrier (insn, address);
12328 else if (INSN_P (insn))
12332 note_invalid_constants (insn, address, true);
12333 address += get_attr_length (insn);
12335 /* If the insn is a vector jump, add the size of the table
12336 and skip the table. */
12337 if ((table = is_jump_table (insn)) != NULL)
12339 address += get_jump_table_size (table);
12345 fix = minipool_fix_head;
12347 /* Now scan the fixups and perform the required changes. */
12352 Mfix * last_added_fix;
12353 Mfix * last_barrier = NULL;
12356 /* Skip any further barriers before the next fix. */
12357 while (fix && GET_CODE (fix->insn) == BARRIER)
12360 /* No more fixes. */
12364 last_added_fix = NULL;
12366 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12368 if (GET_CODE (ftmp->insn) == BARRIER)
12370 if (ftmp->address >= minipool_vector_head->max_address)
12373 last_barrier = ftmp;
12375 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12378 last_added_fix = ftmp; /* Keep track of the last fix added. */
12381 /* If we found a barrier, drop back to that; any fixes that we
12382 could have reached but come after the barrier will now go in
12383 the next mini-pool. */
12384 if (last_barrier != NULL)
12386 /* Reduce the refcount for those fixes that won't go into this
12388 for (fdel = last_barrier->next;
12389 fdel && fdel != ftmp;
12392 fdel->minipool->refcount--;
12393 fdel->minipool = NULL;
12396 ftmp = last_barrier;
12400 /* ftmp is first fix that we can't fit into this pool and
12401 there no natural barriers that we could use. Insert a
12402 new barrier in the code somewhere between the previous
12403 fix and this one, and arrange to jump around it. */
12404 HOST_WIDE_INT max_address;
12406 /* The last item on the list of fixes must be a barrier, so
12407 we can never run off the end of the list of fixes without
12408 last_barrier being set. */
12411 max_address = minipool_vector_head->max_address;
12412 /* Check that there isn't another fix that is in range that
12413 we couldn't fit into this pool because the pool was
12414 already too large: we need to put the pool before such an
12415 instruction. The pool itself may come just after the
12416 fix because create_fix_barrier also allows space for a
12417 jump instruction. */
12418 if (ftmp->address < max_address)
12419 max_address = ftmp->address + 1;
12421 last_barrier = create_fix_barrier (last_added_fix, max_address);
12424 assign_minipool_offsets (last_barrier);
12428 if (GET_CODE (ftmp->insn) != BARRIER
12429 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12436 /* Scan over the fixes we have identified for this pool, fixing them
12437 up and adding the constants to the pool itself. */
12438 for (this_fix = fix; this_fix && ftmp != this_fix;
12439 this_fix = this_fix->next)
12440 if (GET_CODE (this_fix->insn) != BARRIER)
12443 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12444 minipool_vector_label),
12445 this_fix->minipool->offset);
12446 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12449 dump_minipool (last_barrier->insn);
12453 /* From now on we must synthesize any constants that we can't handle
12454 directly. This can happen if the RTL gets split during final
12455 instruction generation. */
12456 after_arm_reorg = 1;
12458 /* Free the minipool memory. */
12459 obstack_free (&minipool_obstack, minipool_startobj);
12462 /* Routines to output assembly language. */
12464 /* If the rtx is the correct value then return the string of the number.
12465 In this way we can ensure that valid double constants are generated even
12466 when cross compiling. */
12468 fp_immediate_constant (rtx x)
12473 if (!fp_consts_inited)
12476 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12477 for (i = 0; i < 8; i++)
12478 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12479 return strings_fp[i];
12481 gcc_unreachable ();
12484 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12485 static const char *
12486 fp_const_from_val (REAL_VALUE_TYPE *r)
12490 if (!fp_consts_inited)
12493 for (i = 0; i < 8; i++)
12494 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12495 return strings_fp[i];
12497 gcc_unreachable ();
12500 /* Output the operands of a LDM/STM instruction to STREAM.
12501 MASK is the ARM register set mask of which only bits 0-15 are important.
12502 REG is the base register, either the frame pointer or the stack pointer,
12503 INSTR is the possibly suffixed load or store instruction.
12504 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12507 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12508 unsigned long mask, int rfe)
12511 bool not_first = FALSE;
12513 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12514 fputc ('\t', stream);
12515 asm_fprintf (stream, instr, reg);
12516 fputc ('{', stream);
12518 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12519 if (mask & (1 << i))
12522 fprintf (stream, ", ");
12524 asm_fprintf (stream, "%r", i);
12529 fprintf (stream, "}^\n");
12531 fprintf (stream, "}\n");
12535 /* Output a FLDMD instruction to STREAM.
12536 BASE if the register containing the address.
12537 REG and COUNT specify the register range.
12538 Extra registers may be added to avoid hardware bugs.
12540 We output FLDMD even for ARMv5 VFP implementations. Although
12541 FLDMD is technically not supported until ARMv6, it is believed
12542 that all VFP implementations support its use in this context. */
12545 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12549 /* Workaround ARM10 VFPr1 bug. */
12550 if (count == 2 && !arm_arch6)
12557 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12558 load into multiple parts if we have to handle more than 16 registers. */
12561 vfp_output_fldmd (stream, base, reg, 16);
12562 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12566 fputc ('\t', stream);
12567 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12569 for (i = reg; i < reg + count; i++)
12572 fputs (", ", stream);
12573 asm_fprintf (stream, "d%d", i);
12575 fputs ("}\n", stream);
12580 /* Output the assembly for a store multiple. */
12583 vfp_output_fstmd (rtx * operands)
12590 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12591 p = strlen (pattern);
12593 gcc_assert (GET_CODE (operands[1]) == REG);
12595 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12596 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12598 p += sprintf (&pattern[p], ", d%d", base + i);
12600 strcpy (&pattern[p], "}");
12602 output_asm_insn (pattern, operands);
12607 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12608 number of bytes pushed. */
12611 vfp_emit_fstmd (int base_reg, int count)
12618 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12619 register pairs are stored by a store multiple insn. We avoid this
12620 by pushing an extra pair. */
12621 if (count == 2 && !arm_arch6)
12623 if (base_reg == LAST_VFP_REGNUM - 3)
12628 /* FSTMD may not store more than 16 doubleword registers at once. Split
12629 larger stores into multiple parts (up to a maximum of two, in
12634 /* NOTE: base_reg is an internal register number, so each D register
12636 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12637 saved += vfp_emit_fstmd (base_reg, 16);
12641 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12642 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12644 reg = gen_rtx_REG (DFmode, base_reg);
12647 XVECEXP (par, 0, 0)
12648 = gen_rtx_SET (VOIDmode,
12651 gen_rtx_PRE_MODIFY (Pmode,
12654 (stack_pointer_rtx,
12657 gen_rtx_UNSPEC (BLKmode,
12658 gen_rtvec (1, reg),
12659 UNSPEC_PUSH_MULT));
12661 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12662 plus_constant (stack_pointer_rtx, -(count * 8)));
12663 RTX_FRAME_RELATED_P (tmp) = 1;
12664 XVECEXP (dwarf, 0, 0) = tmp;
12666 tmp = gen_rtx_SET (VOIDmode,
12667 gen_frame_mem (DFmode, stack_pointer_rtx),
12669 RTX_FRAME_RELATED_P (tmp) = 1;
12670 XVECEXP (dwarf, 0, 1) = tmp;
12672 for (i = 1; i < count; i++)
12674 reg = gen_rtx_REG (DFmode, base_reg);
12676 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12678 tmp = gen_rtx_SET (VOIDmode,
12679 gen_frame_mem (DFmode,
12680 plus_constant (stack_pointer_rtx,
12683 RTX_FRAME_RELATED_P (tmp) = 1;
12684 XVECEXP (dwarf, 0, i + 1) = tmp;
12687 par = emit_insn (par);
12688 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12689 RTX_FRAME_RELATED_P (par) = 1;
12694 /* Emit a call instruction with pattern PAT. ADDR is the address of
12695 the call target. */
12698 arm_emit_call_insn (rtx pat, rtx addr)
12702 insn = emit_call_insn (pat);
12704 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12705 If the call might use such an entry, add a use of the PIC register
12706 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12707 if (TARGET_VXWORKS_RTP
12709 && GET_CODE (addr) == SYMBOL_REF
12710 && (SYMBOL_REF_DECL (addr)
12711 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12712 : !SYMBOL_REF_LOCAL_P (addr)))
12714 require_pic_register ();
12715 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12719 /* Output a 'call' insn. */
12721 output_call (rtx *operands)
12723 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12725 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12726 if (REGNO (operands[0]) == LR_REGNUM)
12728 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12729 output_asm_insn ("mov%?\t%0, %|lr", operands);
12732 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12734 if (TARGET_INTERWORK || arm_arch4t)
12735 output_asm_insn ("bx%?\t%0", operands);
12737 output_asm_insn ("mov%?\t%|pc, %0", operands);
12742 /* Output a 'call' insn that is a reference in memory. This is
12743 disabled for ARMv5 and we prefer a blx instead because otherwise
12744 there's a significant performance overhead. */
12746 output_call_mem (rtx *operands)
12748 gcc_assert (!arm_arch5);
12749 if (TARGET_INTERWORK)
12751 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12752 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12753 output_asm_insn ("bx%?\t%|ip", operands);
12755 else if (regno_use_in (LR_REGNUM, operands[0]))
12757 /* LR is used in the memory address. We load the address in the
12758 first instruction. It's safe to use IP as the target of the
12759 load since the call will kill it anyway. */
12760 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12761 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12763 output_asm_insn ("bx%?\t%|ip", operands);
12765 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12769 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12770 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12777 /* Output a move from arm registers to an fpa registers.
12778 OPERANDS[0] is an fpa register.
12779 OPERANDS[1] is the first registers of an arm register pair. */
12781 output_mov_long_double_fpa_from_arm (rtx *operands)
12783 int arm_reg0 = REGNO (operands[1]);
12786 gcc_assert (arm_reg0 != IP_REGNUM);
12788 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12789 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12790 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12792 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12793 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12798 /* Output a move from an fpa register to arm registers.
12799 OPERANDS[0] is the first registers of an arm register pair.
12800 OPERANDS[1] is an fpa register. */
12802 output_mov_long_double_arm_from_fpa (rtx *operands)
12804 int arm_reg0 = REGNO (operands[0]);
12807 gcc_assert (arm_reg0 != IP_REGNUM);
12809 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12810 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12811 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12813 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12814 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12818 /* Output a move from arm registers to arm registers of a long double
12819 OPERANDS[0] is the destination.
12820 OPERANDS[1] is the source. */
12822 output_mov_long_double_arm_from_arm (rtx *operands)
12824 /* We have to be careful here because the two might overlap. */
12825 int dest_start = REGNO (operands[0]);
12826 int src_start = REGNO (operands[1]);
12830 if (dest_start < src_start)
12832 for (i = 0; i < 3; i++)
12834 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12835 ops[1] = gen_rtx_REG (SImode, src_start + i);
12836 output_asm_insn ("mov%?\t%0, %1", ops);
12841 for (i = 2; i >= 0; i--)
12843 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12844 ops[1] = gen_rtx_REG (SImode, src_start + i);
12845 output_asm_insn ("mov%?\t%0, %1", ops);
12853 arm_emit_movpair (rtx dest, rtx src)
12855 /* If the src is an immediate, simplify it. */
12856 if (CONST_INT_P (src))
12858 HOST_WIDE_INT val = INTVAL (src);
12859 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12860 if ((val >> 16) & 0x0000ffff)
12861 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12863 GEN_INT ((val >> 16) & 0x0000ffff));
12866 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12867 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12870 /* Output a move from arm registers to an fpa registers.
12871 OPERANDS[0] is an fpa register.
12872 OPERANDS[1] is the first registers of an arm register pair. */
12874 output_mov_double_fpa_from_arm (rtx *operands)
12876 int arm_reg0 = REGNO (operands[1]);
12879 gcc_assert (arm_reg0 != IP_REGNUM);
12881 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12882 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12883 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12884 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12888 /* Output a move from an fpa register to arm registers.
12889 OPERANDS[0] is the first registers of an arm register pair.
12890 OPERANDS[1] is an fpa register. */
12892 output_mov_double_arm_from_fpa (rtx *operands)
12894 int arm_reg0 = REGNO (operands[0]);
12897 gcc_assert (arm_reg0 != IP_REGNUM);
12899 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12900 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12901 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12902 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12906 /* Output a move between double words. It must be REG<-MEM
12909 output_move_double (rtx *operands)
12911 enum rtx_code code0 = GET_CODE (operands[0]);
12912 enum rtx_code code1 = GET_CODE (operands[1]);
12917 unsigned int reg0 = REGNO (operands[0]);
12919 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12921 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12923 switch (GET_CODE (XEXP (operands[1], 0)))
12927 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12928 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12930 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12934 gcc_assert (TARGET_LDRD);
12935 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12940 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12942 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12947 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12949 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12953 gcc_assert (TARGET_LDRD);
12954 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12959 /* Autoicrement addressing modes should never have overlapping
12960 base and destination registers, and overlapping index registers
12961 are already prohibited, so this doesn't need to worry about
12963 otherops[0] = operands[0];
12964 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12965 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12967 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12969 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12971 /* Registers overlap so split out the increment. */
12972 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12973 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12977 /* Use a single insn if we can.
12978 FIXME: IWMMXT allows offsets larger than ldrd can
12979 handle, fix these up with a pair of ldr. */
12981 || GET_CODE (otherops[2]) != CONST_INT
12982 || (INTVAL (otherops[2]) > -256
12983 && INTVAL (otherops[2]) < 256))
12984 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12987 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12988 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12994 /* Use a single insn if we can.
12995 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12996 fix these up with a pair of ldr. */
12998 || GET_CODE (otherops[2]) != CONST_INT
12999 || (INTVAL (otherops[2]) > -256
13000 && INTVAL (otherops[2]) < 256))
13001 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13004 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13005 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13012 /* We might be able to use ldrd %0, %1 here. However the range is
13013 different to ldr/adr, and it is broken on some ARMv7-M
13014 implementations. */
13015 /* Use the second register of the pair to avoid problematic
13017 otherops[1] = operands[1];
13018 output_asm_insn ("adr%?\t%0, %1", otherops);
13019 operands[1] = otherops[0];
13021 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13023 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13026 /* ??? This needs checking for thumb2. */
13028 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13029 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13031 otherops[0] = operands[0];
13032 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13033 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13035 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13037 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13039 switch ((int) INTVAL (otherops[2]))
13042 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13047 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13052 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13056 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13057 operands[1] = otherops[0];
13059 && (GET_CODE (otherops[2]) == REG
13061 || (GET_CODE (otherops[2]) == CONST_INT
13062 && INTVAL (otherops[2]) > -256
13063 && INTVAL (otherops[2]) < 256)))
13065 if (reg_overlap_mentioned_p (operands[0],
13069 /* Swap base and index registers over to
13070 avoid a conflict. */
13072 otherops[1] = otherops[2];
13075 /* If both registers conflict, it will usually
13076 have been fixed by a splitter. */
13077 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13078 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13080 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13081 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13085 otherops[0] = operands[0];
13086 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13091 if (GET_CODE (otherops[2]) == CONST_INT)
13093 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13094 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13096 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13099 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13102 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13105 return "ldr%(d%)\t%0, [%1]";
13107 return "ldm%(ia%)\t%1, %M0";
13111 otherops[1] = adjust_address (operands[1], SImode, 4);
13112 /* Take care of overlapping base/data reg. */
13113 if (reg_mentioned_p (operands[0], operands[1]))
13115 output_asm_insn ("ldr%?\t%0, %1", otherops);
13116 output_asm_insn ("ldr%?\t%0, %1", operands);
13120 output_asm_insn ("ldr%?\t%0, %1", operands);
13121 output_asm_insn ("ldr%?\t%0, %1", otherops);
13128 /* Constraints should ensure this. */
13129 gcc_assert (code0 == MEM && code1 == REG);
13130 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13132 switch (GET_CODE (XEXP (operands[0], 0)))
13136 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13138 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13142 gcc_assert (TARGET_LDRD);
13143 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13148 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13150 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13155 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13157 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13161 gcc_assert (TARGET_LDRD);
13162 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13167 otherops[0] = operands[1];
13168 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13169 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13171 /* IWMMXT allows offsets larger than ldrd can handle,
13172 fix these up with a pair of ldr. */
13174 && GET_CODE (otherops[2]) == CONST_INT
13175 && (INTVAL(otherops[2]) <= -256
13176 || INTVAL(otherops[2]) >= 256))
13178 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13180 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13181 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13185 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13186 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13189 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13190 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13192 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13196 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13197 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13199 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13202 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13208 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13214 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13219 && (GET_CODE (otherops[2]) == REG
13221 || (GET_CODE (otherops[2]) == CONST_INT
13222 && INTVAL (otherops[2]) > -256
13223 && INTVAL (otherops[2]) < 256)))
13225 otherops[0] = operands[1];
13226 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13227 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13233 otherops[0] = adjust_address (operands[0], SImode, 4);
13234 otherops[1] = operands[1];
13235 output_asm_insn ("str%?\t%1, %0", operands);
13236 output_asm_insn ("str%?\t%H1, %0", otherops);
13243 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13244 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13247 output_move_quad (rtx *operands)
13249 if (REG_P (operands[0]))
13251 /* Load, or reg->reg move. */
13253 if (MEM_P (operands[1]))
13255 switch (GET_CODE (XEXP (operands[1], 0)))
13258 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13263 output_asm_insn ("adr%?\t%0, %1", operands);
13264 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13268 gcc_unreachable ();
13276 gcc_assert (REG_P (operands[1]));
13278 dest = REGNO (operands[0]);
13279 src = REGNO (operands[1]);
13281 /* This seems pretty dumb, but hopefully GCC won't try to do it
13284 for (i = 0; i < 4; i++)
13286 ops[0] = gen_rtx_REG (SImode, dest + i);
13287 ops[1] = gen_rtx_REG (SImode, src + i);
13288 output_asm_insn ("mov%?\t%0, %1", ops);
13291 for (i = 3; i >= 0; i--)
13293 ops[0] = gen_rtx_REG (SImode, dest + i);
13294 ops[1] = gen_rtx_REG (SImode, src + i);
13295 output_asm_insn ("mov%?\t%0, %1", ops);
13301 gcc_assert (MEM_P (operands[0]));
13302 gcc_assert (REG_P (operands[1]));
13303 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13305 switch (GET_CODE (XEXP (operands[0], 0)))
13308 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13312 gcc_unreachable ();
13319 /* Output a VFP load or store instruction. */
13322 output_move_vfp (rtx *operands)
13324 rtx reg, mem, addr, ops[2];
13325 int load = REG_P (operands[0]);
13326 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13327 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13330 enum machine_mode mode;
13332 reg = operands[!load];
13333 mem = operands[load];
13335 mode = GET_MODE (reg);
13337 gcc_assert (REG_P (reg));
13338 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13339 gcc_assert (mode == SFmode
13343 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13344 gcc_assert (MEM_P (mem));
13346 addr = XEXP (mem, 0);
13348 switch (GET_CODE (addr))
13351 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13352 ops[0] = XEXP (addr, 0);
13357 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13358 ops[0] = XEXP (addr, 0);
13363 templ = "f%s%c%%?\t%%%s0, %%1%s";
13369 sprintf (buff, templ,
13370 load ? "ld" : "st",
13373 integer_p ? "\t%@ int" : "");
13374 output_asm_insn (buff, ops);
13379 /* Output a Neon quad-word load or store, or a load or store for
13380 larger structure modes.
13382 WARNING: The ordering of elements is weird in big-endian mode,
13383 because we use VSTM, as required by the EABI. GCC RTL defines
13384 element ordering based on in-memory order. This can be differ
13385 from the architectural ordering of elements within a NEON register.
13386 The intrinsics defined in arm_neon.h use the NEON register element
13387 ordering, not the GCC RTL element ordering.
13389 For example, the in-memory ordering of a big-endian a quadword
13390 vector with 16-bit elements when stored from register pair {d0,d1}
13391 will be (lowest address first, d0[N] is NEON register element N):
13393 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13395 When necessary, quadword registers (dN, dN+1) are moved to ARM
13396 registers from rN in the order:
13398 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13400 So that STM/LDM can be used on vectors in ARM registers, and the
13401 same memory layout will result as if VSTM/VLDM were used. */
13404 output_move_neon (rtx *operands)
13406 rtx reg, mem, addr, ops[2];
13407 int regno, load = REG_P (operands[0]);
13410 enum machine_mode mode;
13412 reg = operands[!load];
13413 mem = operands[load];
13415 mode = GET_MODE (reg);
13417 gcc_assert (REG_P (reg));
13418 regno = REGNO (reg);
13419 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13420 || NEON_REGNO_OK_FOR_QUAD (regno));
13421 gcc_assert (VALID_NEON_DREG_MODE (mode)
13422 || VALID_NEON_QREG_MODE (mode)
13423 || VALID_NEON_STRUCT_MODE (mode));
13424 gcc_assert (MEM_P (mem));
13426 addr = XEXP (mem, 0);
13428 /* Strip off const from addresses like (const (plus (...))). */
13429 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13430 addr = XEXP (addr, 0);
13432 switch (GET_CODE (addr))
13435 templ = "v%smia%%?\t%%0!, %%h1";
13436 ops[0] = XEXP (addr, 0);
13441 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13442 templ = "v%smdb%%?\t%%0!, %%h1";
13443 ops[0] = XEXP (addr, 0);
13448 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13449 gcc_unreachable ();
13454 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13457 for (i = 0; i < nregs; i++)
13459 /* We're only using DImode here because it's a convenient size. */
13460 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13461 ops[1] = adjust_address (mem, DImode, 8 * i);
13462 if (reg_overlap_mentioned_p (ops[0], mem))
13464 gcc_assert (overlap == -1);
13469 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13470 output_asm_insn (buff, ops);
13475 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13476 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13477 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13478 output_asm_insn (buff, ops);
13485 templ = "v%smia%%?\t%%m0, %%h1";
13490 sprintf (buff, templ, load ? "ld" : "st");
13491 output_asm_insn (buff, ops);
13496 /* Compute and return the length of neon_mov<mode>, where <mode> is
13497 one of VSTRUCT modes: EI, OI, CI or XI. */
13499 arm_attr_length_move_neon (rtx insn)
13501 rtx reg, mem, addr;
13503 enum machine_mode mode;
13505 extract_insn_cached (insn);
13507 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13509 mode = GET_MODE (recog_data.operand[0]);
13520 gcc_unreachable ();
13524 load = REG_P (recog_data.operand[0]);
13525 reg = recog_data.operand[!load];
13526 mem = recog_data.operand[load];
13528 gcc_assert (MEM_P (mem));
13530 mode = GET_MODE (reg);
13531 addr = XEXP (mem, 0);
13533 /* Strip off const from addresses like (const (plus (...))). */
13534 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13535 addr = XEXP (addr, 0);
13537 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13539 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13546 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13550 arm_address_offset_is_imm (rtx insn)
13554 extract_insn_cached (insn);
13556 if (REG_P (recog_data.operand[0]))
13559 mem = recog_data.operand[0];
13561 gcc_assert (MEM_P (mem));
13563 addr = XEXP (mem, 0);
13565 if (GET_CODE (addr) == REG
13566 || (GET_CODE (addr) == PLUS
13567 && GET_CODE (XEXP (addr, 0)) == REG
13568 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13574 /* Output an ADD r, s, #n where n may be too big for one instruction.
13575 If adding zero to one register, output nothing. */
13577 output_add_immediate (rtx *operands)
13579 HOST_WIDE_INT n = INTVAL (operands[2]);
13581 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13584 output_multi_immediate (operands,
13585 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13588 output_multi_immediate (operands,
13589 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13596 /* Output a multiple immediate operation.
13597 OPERANDS is the vector of operands referred to in the output patterns.
13598 INSTR1 is the output pattern to use for the first constant.
13599 INSTR2 is the output pattern to use for subsequent constants.
13600 IMMED_OP is the index of the constant slot in OPERANDS.
13601 N is the constant value. */
13602 static const char *
13603 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13604 int immed_op, HOST_WIDE_INT n)
13606 #if HOST_BITS_PER_WIDE_INT > 32
13612 /* Quick and easy output. */
13613 operands[immed_op] = const0_rtx;
13614 output_asm_insn (instr1, operands);
13619 const char * instr = instr1;
13621 /* Note that n is never zero here (which would give no output). */
13622 for (i = 0; i < 32; i += 2)
13626 operands[immed_op] = GEN_INT (n & (255 << i));
13627 output_asm_insn (instr, operands);
13637 /* Return the name of a shifter operation. */
13638 static const char *
13639 arm_shift_nmem(enum rtx_code code)
13644 return ARM_LSL_NAME;
13660 /* Return the appropriate ARM instruction for the operation code.
13661 The returned result should not be overwritten. OP is the rtx of the
13662 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13665 arithmetic_instr (rtx op, int shift_first_arg)
13667 switch (GET_CODE (op))
13673 return shift_first_arg ? "rsb" : "sub";
13688 return arm_shift_nmem(GET_CODE(op));
13691 gcc_unreachable ();
13695 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13696 for the operation code. The returned result should not be overwritten.
13697 OP is the rtx code of the shift.
13698 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13700 static const char *
13701 shift_op (rtx op, HOST_WIDE_INT *amountp)
13704 enum rtx_code code = GET_CODE (op);
13706 switch (GET_CODE (XEXP (op, 1)))
13714 *amountp = INTVAL (XEXP (op, 1));
13718 gcc_unreachable ();
13724 gcc_assert (*amountp != -1);
13725 *amountp = 32 - *amountp;
13728 /* Fall through. */
13734 mnem = arm_shift_nmem(code);
13738 /* We never have to worry about the amount being other than a
13739 power of 2, since this case can never be reloaded from a reg. */
13740 gcc_assert (*amountp != -1);
13741 *amountp = int_log2 (*amountp);
13742 return ARM_LSL_NAME;
13745 gcc_unreachable ();
13748 if (*amountp != -1)
13750 /* This is not 100% correct, but follows from the desire to merge
13751 multiplication by a power of 2 with the recognizer for a
13752 shift. >=32 is not a valid shift for "lsl", so we must try and
13753 output a shift that produces the correct arithmetical result.
13754 Using lsr #32 is identical except for the fact that the carry bit
13755 is not set correctly if we set the flags; but we never use the
13756 carry bit from such an operation, so we can ignore that. */
13757 if (code == ROTATERT)
13758 /* Rotate is just modulo 32. */
13760 else if (*amountp != (*amountp & 31))
13762 if (code == ASHIFT)
13767 /* Shifts of 0 are no-ops. */
13775 /* Obtain the shift from the POWER of two. */
13777 static HOST_WIDE_INT
13778 int_log2 (HOST_WIDE_INT power)
13780 HOST_WIDE_INT shift = 0;
13782 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13784 gcc_assert (shift <= 31);
13791 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13792 because /bin/as is horribly restrictive. The judgement about
13793 whether or not each character is 'printable' (and can be output as
13794 is) or not (and must be printed with an octal escape) must be made
13795 with reference to the *host* character set -- the situation is
13796 similar to that discussed in the comments above pp_c_char in
13797 c-pretty-print.c. */
13799 #define MAX_ASCII_LEN 51
13802 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13805 int len_so_far = 0;
13807 fputs ("\t.ascii\t\"", stream);
13809 for (i = 0; i < len; i++)
13813 if (len_so_far >= MAX_ASCII_LEN)
13815 fputs ("\"\n\t.ascii\t\"", stream);
13821 if (c == '\\' || c == '\"')
13823 putc ('\\', stream);
13831 fprintf (stream, "\\%03o", c);
13836 fputs ("\"\n", stream);
13839 /* Compute the register save mask for registers 0 through 12
13840 inclusive. This code is used by arm_compute_save_reg_mask. */
13842 static unsigned long
13843 arm_compute_save_reg0_reg12_mask (void)
13845 unsigned long func_type = arm_current_func_type ();
13846 unsigned long save_reg_mask = 0;
13849 if (IS_INTERRUPT (func_type))
13851 unsigned int max_reg;
13852 /* Interrupt functions must not corrupt any registers,
13853 even call clobbered ones. If this is a leaf function
13854 we can just examine the registers used by the RTL, but
13855 otherwise we have to assume that whatever function is
13856 called might clobber anything, and so we have to save
13857 all the call-clobbered registers as well. */
13858 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13859 /* FIQ handlers have registers r8 - r12 banked, so
13860 we only need to check r0 - r7, Normal ISRs only
13861 bank r14 and r15, so we must check up to r12.
13862 r13 is the stack pointer which is always preserved,
13863 so we do not need to consider it here. */
13868 for (reg = 0; reg <= max_reg; reg++)
13869 if (df_regs_ever_live_p (reg)
13870 || (! current_function_is_leaf && call_used_regs[reg]))
13871 save_reg_mask |= (1 << reg);
13873 /* Also save the pic base register if necessary. */
13875 && !TARGET_SINGLE_PIC_BASE
13876 && arm_pic_register != INVALID_REGNUM
13877 && crtl->uses_pic_offset_table)
13878 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13880 else if (IS_VOLATILE(func_type))
13882 /* For noreturn functions we historically omitted register saves
13883 altogether. However this really messes up debugging. As a
13884 compromise save just the frame pointers. Combined with the link
13885 register saved elsewhere this should be sufficient to get
13887 if (frame_pointer_needed)
13888 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13889 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13890 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13891 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13892 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13896 /* In the normal case we only need to save those registers
13897 which are call saved and which are used by this function. */
13898 for (reg = 0; reg <= 11; reg++)
13899 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13900 save_reg_mask |= (1 << reg);
13902 /* Handle the frame pointer as a special case. */
13903 if (frame_pointer_needed)
13904 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13906 /* If we aren't loading the PIC register,
13907 don't stack it even though it may be live. */
13909 && !TARGET_SINGLE_PIC_BASE
13910 && arm_pic_register != INVALID_REGNUM
13911 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13912 || crtl->uses_pic_offset_table))
13913 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13915 /* The prologue will copy SP into R0, so save it. */
13916 if (IS_STACKALIGN (func_type))
13917 save_reg_mask |= 1;
13920 /* Save registers so the exception handler can modify them. */
13921 if (crtl->calls_eh_return)
13927 reg = EH_RETURN_DATA_REGNO (i);
13928 if (reg == INVALID_REGNUM)
13930 save_reg_mask |= 1 << reg;
13934 return save_reg_mask;
13938 /* Compute the number of bytes used to store the static chain register on the
13939 stack, above the stack frame. We need to know this accurately to get the
13940 alignment of the rest of the stack frame correct. */
13942 static int arm_compute_static_chain_stack_bytes (void)
13944 unsigned long func_type = arm_current_func_type ();
13945 int static_chain_stack_bytes = 0;
13947 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13948 IS_NESTED (func_type) &&
13949 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13950 static_chain_stack_bytes = 4;
13952 return static_chain_stack_bytes;
13956 /* Compute a bit mask of which registers need to be
13957 saved on the stack for the current function.
13958 This is used by arm_get_frame_offsets, which may add extra registers. */
13960 static unsigned long
13961 arm_compute_save_reg_mask (void)
13963 unsigned int save_reg_mask = 0;
13964 unsigned long func_type = arm_current_func_type ();
13967 if (IS_NAKED (func_type))
13968 /* This should never really happen. */
13971 /* If we are creating a stack frame, then we must save the frame pointer,
13972 IP (which will hold the old stack pointer), LR and the PC. */
13973 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13975 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13978 | (1 << PC_REGNUM);
13980 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13982 /* Decide if we need to save the link register.
13983 Interrupt routines have their own banked link register,
13984 so they never need to save it.
13985 Otherwise if we do not use the link register we do not need to save
13986 it. If we are pushing other registers onto the stack however, we
13987 can save an instruction in the epilogue by pushing the link register
13988 now and then popping it back into the PC. This incurs extra memory
13989 accesses though, so we only do it when optimizing for size, and only
13990 if we know that we will not need a fancy return sequence. */
13991 if (df_regs_ever_live_p (LR_REGNUM)
13994 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13995 && !crtl->calls_eh_return))
13996 save_reg_mask |= 1 << LR_REGNUM;
13998 if (cfun->machine->lr_save_eliminated)
13999 save_reg_mask &= ~ (1 << LR_REGNUM);
14001 if (TARGET_REALLY_IWMMXT
14002 && ((bit_count (save_reg_mask)
14003 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14004 arm_compute_static_chain_stack_bytes())
14007 /* The total number of registers that are going to be pushed
14008 onto the stack is odd. We need to ensure that the stack
14009 is 64-bit aligned before we start to save iWMMXt registers,
14010 and also before we start to create locals. (A local variable
14011 might be a double or long long which we will load/store using
14012 an iWMMXt instruction). Therefore we need to push another
14013 ARM register, so that the stack will be 64-bit aligned. We
14014 try to avoid using the arg registers (r0 -r3) as they might be
14015 used to pass values in a tail call. */
14016 for (reg = 4; reg <= 12; reg++)
14017 if ((save_reg_mask & (1 << reg)) == 0)
14021 save_reg_mask |= (1 << reg);
14024 cfun->machine->sibcall_blocked = 1;
14025 save_reg_mask |= (1 << 3);
14029 /* We may need to push an additional register for use initializing the
14030 PIC base register. */
14031 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14032 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14034 reg = thumb_find_work_register (1 << 4);
14035 if (!call_used_regs[reg])
14036 save_reg_mask |= (1 << reg);
14039 return save_reg_mask;
14043 /* Compute a bit mask of which registers need to be
14044 saved on the stack for the current function. */
14045 static unsigned long
14046 thumb1_compute_save_reg_mask (void)
14048 unsigned long mask;
14052 for (reg = 0; reg < 12; reg ++)
14053 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14057 && !TARGET_SINGLE_PIC_BASE
14058 && arm_pic_register != INVALID_REGNUM
14059 && crtl->uses_pic_offset_table)
14060 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14062 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14063 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14064 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14066 /* LR will also be pushed if any lo regs are pushed. */
14067 if (mask & 0xff || thumb_force_lr_save ())
14068 mask |= (1 << LR_REGNUM);
14070 /* Make sure we have a low work register if we need one.
14071 We will need one if we are going to push a high register,
14072 but we are not currently intending to push a low register. */
14073 if ((mask & 0xff) == 0
14074 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14076 /* Use thumb_find_work_register to choose which register
14077 we will use. If the register is live then we will
14078 have to push it. Use LAST_LO_REGNUM as our fallback
14079 choice for the register to select. */
14080 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14081 /* Make sure the register returned by thumb_find_work_register is
14082 not part of the return value. */
14083 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14084 reg = LAST_LO_REGNUM;
14086 if (! call_used_regs[reg])
14090 /* The 504 below is 8 bytes less than 512 because there are two possible
14091 alignment words. We can't tell here if they will be present or not so we
14092 have to play it safe and assume that they are. */
14093 if ((CALLER_INTERWORKING_SLOT_SIZE +
14094 ROUND_UP_WORD (get_frame_size ()) +
14095 crtl->outgoing_args_size) >= 504)
14097 /* This is the same as the code in thumb1_expand_prologue() which
14098 determines which register to use for stack decrement. */
14099 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14100 if (mask & (1 << reg))
14103 if (reg > LAST_LO_REGNUM)
14105 /* Make sure we have a register available for stack decrement. */
14106 mask |= 1 << LAST_LO_REGNUM;
14114 /* Return the number of bytes required to save VFP registers. */
14116 arm_get_vfp_saved_size (void)
14118 unsigned int regno;
14123 /* Space for saved VFP registers. */
14124 if (TARGET_HARD_FLOAT && TARGET_VFP)
14127 for (regno = FIRST_VFP_REGNUM;
14128 regno < LAST_VFP_REGNUM;
14131 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14132 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14136 /* Workaround ARM10 VFPr1 bug. */
14137 if (count == 2 && !arm_arch6)
14139 saved += count * 8;
14148 if (count == 2 && !arm_arch6)
14150 saved += count * 8;
14157 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14158 everything bar the final return instruction. */
14160 output_return_instruction (rtx operand, int really_return, int reverse)
14162 char conditional[10];
14165 unsigned long live_regs_mask;
14166 unsigned long func_type;
14167 arm_stack_offsets *offsets;
14169 func_type = arm_current_func_type ();
14171 if (IS_NAKED (func_type))
14174 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14176 /* If this function was declared non-returning, and we have
14177 found a tail call, then we have to trust that the called
14178 function won't return. */
14183 /* Otherwise, trap an attempted return by aborting. */
14185 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14187 assemble_external_libcall (ops[1]);
14188 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14194 gcc_assert (!cfun->calls_alloca || really_return);
14196 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14198 cfun->machine->return_used_this_function = 1;
14200 offsets = arm_get_frame_offsets ();
14201 live_regs_mask = offsets->saved_regs_mask;
14203 if (live_regs_mask)
14205 const char * return_reg;
14207 /* If we do not have any special requirements for function exit
14208 (e.g. interworking) then we can load the return address
14209 directly into the PC. Otherwise we must load it into LR. */
14211 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14212 return_reg = reg_names[PC_REGNUM];
14214 return_reg = reg_names[LR_REGNUM];
14216 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14218 /* There are three possible reasons for the IP register
14219 being saved. 1) a stack frame was created, in which case
14220 IP contains the old stack pointer, or 2) an ISR routine
14221 corrupted it, or 3) it was saved to align the stack on
14222 iWMMXt. In case 1, restore IP into SP, otherwise just
14224 if (frame_pointer_needed)
14226 live_regs_mask &= ~ (1 << IP_REGNUM);
14227 live_regs_mask |= (1 << SP_REGNUM);
14230 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14233 /* On some ARM architectures it is faster to use LDR rather than
14234 LDM to load a single register. On other architectures, the
14235 cost is the same. In 26 bit mode, or for exception handlers,
14236 we have to use LDM to load the PC so that the CPSR is also
14238 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14239 if (live_regs_mask == (1U << reg))
14242 if (reg <= LAST_ARM_REGNUM
14243 && (reg != LR_REGNUM
14245 || ! IS_INTERRUPT (func_type)))
14247 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14248 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14255 /* Generate the load multiple instruction to restore the
14256 registers. Note we can get here, even if
14257 frame_pointer_needed is true, but only if sp already
14258 points to the base of the saved core registers. */
14259 if (live_regs_mask & (1 << SP_REGNUM))
14261 unsigned HOST_WIDE_INT stack_adjust;
14263 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14264 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14266 if (stack_adjust && arm_arch5 && TARGET_ARM)
14267 if (TARGET_UNIFIED_ASM)
14268 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14270 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14273 /* If we can't use ldmib (SA110 bug),
14274 then try to pop r3 instead. */
14276 live_regs_mask |= 1 << 3;
14278 if (TARGET_UNIFIED_ASM)
14279 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14281 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14285 if (TARGET_UNIFIED_ASM)
14286 sprintf (instr, "pop%s\t{", conditional);
14288 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14290 p = instr + strlen (instr);
14292 for (reg = 0; reg <= SP_REGNUM; reg++)
14293 if (live_regs_mask & (1 << reg))
14295 int l = strlen (reg_names[reg]);
14301 memcpy (p, ", ", 2);
14305 memcpy (p, "%|", 2);
14306 memcpy (p + 2, reg_names[reg], l);
14310 if (live_regs_mask & (1 << LR_REGNUM))
14312 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14313 /* If returning from an interrupt, restore the CPSR. */
14314 if (IS_INTERRUPT (func_type))
14321 output_asm_insn (instr, & operand);
14323 /* See if we need to generate an extra instruction to
14324 perform the actual function return. */
14326 && func_type != ARM_FT_INTERWORKED
14327 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14329 /* The return has already been handled
14330 by loading the LR into the PC. */
14337 switch ((int) ARM_FUNC_TYPE (func_type))
14341 /* ??? This is wrong for unified assembly syntax. */
14342 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14345 case ARM_FT_INTERWORKED:
14346 sprintf (instr, "bx%s\t%%|lr", conditional);
14349 case ARM_FT_EXCEPTION:
14350 /* ??? This is wrong for unified assembly syntax. */
14351 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14355 /* Use bx if it's available. */
14356 if (arm_arch5 || arm_arch4t)
14357 sprintf (instr, "bx%s\t%%|lr", conditional);
14359 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14363 output_asm_insn (instr, & operand);
14369 /* Write the function name into the code section, directly preceding
14370 the function prologue.
14372 Code will be output similar to this:
14374 .ascii "arm_poke_function_name", 0
14377 .word 0xff000000 + (t1 - t0)
14378 arm_poke_function_name
14380 stmfd sp!, {fp, ip, lr, pc}
14383 When performing a stack backtrace, code can inspect the value
14384 of 'pc' stored at 'fp' + 0. If the trace function then looks
14385 at location pc - 12 and the top 8 bits are set, then we know
14386 that there is a function name embedded immediately preceding this
14387 location and has length ((pc[-3]) & 0xff000000).
14389 We assume that pc is declared as a pointer to an unsigned long.
14391 It is of no benefit to output the function name if we are assembling
14392 a leaf function. These function types will not contain a stack
14393 backtrace structure, therefore it is not possible to determine the
14396 arm_poke_function_name (FILE *stream, const char *name)
14398 unsigned long alignlength;
14399 unsigned long length;
14402 length = strlen (name) + 1;
14403 alignlength = ROUND_UP_WORD (length);
14405 ASM_OUTPUT_ASCII (stream, name, length);
14406 ASM_OUTPUT_ALIGN (stream, 2);
14407 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14408 assemble_aligned_integer (UNITS_PER_WORD, x);
14411 /* Place some comments into the assembler stream
14412 describing the current function. */
14414 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14416 unsigned long func_type;
14420 thumb1_output_function_prologue (f, frame_size);
14424 /* Sanity check. */
14425 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14427 func_type = arm_current_func_type ();
14429 switch ((int) ARM_FUNC_TYPE (func_type))
14432 case ARM_FT_NORMAL:
14434 case ARM_FT_INTERWORKED:
14435 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14438 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14441 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14443 case ARM_FT_EXCEPTION:
14444 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14448 if (IS_NAKED (func_type))
14449 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14451 if (IS_VOLATILE (func_type))
14452 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14454 if (IS_NESTED (func_type))
14455 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14456 if (IS_STACKALIGN (func_type))
14457 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14459 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14461 crtl->args.pretend_args_size, frame_size);
14463 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14464 frame_pointer_needed,
14465 cfun->machine->uses_anonymous_args);
14467 if (cfun->machine->lr_save_eliminated)
14468 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14470 if (crtl->calls_eh_return)
14471 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14476 arm_output_epilogue (rtx sibling)
14479 unsigned long saved_regs_mask;
14480 unsigned long func_type;
14481 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14482 frame that is $fp + 4 for a non-variadic function. */
14483 int floats_offset = 0;
14485 FILE * f = asm_out_file;
14486 unsigned int lrm_count = 0;
14487 int really_return = (sibling == NULL);
14489 arm_stack_offsets *offsets;
14491 /* If we have already generated the return instruction
14492 then it is futile to generate anything else. */
14493 if (use_return_insn (FALSE, sibling) &&
14494 (cfun->machine->return_used_this_function != 0))
14497 func_type = arm_current_func_type ();
14499 if (IS_NAKED (func_type))
14500 /* Naked functions don't have epilogues. */
14503 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14507 /* A volatile function should never return. Call abort. */
14508 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14509 assemble_external_libcall (op);
14510 output_asm_insn ("bl\t%a0", &op);
14515 /* If we are throwing an exception, then we really must be doing a
14516 return, so we can't tail-call. */
14517 gcc_assert (!crtl->calls_eh_return || really_return);
14519 offsets = arm_get_frame_offsets ();
14520 saved_regs_mask = offsets->saved_regs_mask;
14523 lrm_count = bit_count (saved_regs_mask);
14525 floats_offset = offsets->saved_args;
14526 /* Compute how far away the floats will be. */
14527 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14528 if (saved_regs_mask & (1 << reg))
14529 floats_offset += 4;
14531 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14533 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14534 int vfp_offset = offsets->frame;
14536 if (TARGET_FPA_EMU2)
14538 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14539 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14541 floats_offset += 12;
14542 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14543 reg, FP_REGNUM, floats_offset - vfp_offset);
14548 start_reg = LAST_FPA_REGNUM;
14550 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14552 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14554 floats_offset += 12;
14556 /* We can't unstack more than four registers at once. */
14557 if (start_reg - reg == 3)
14559 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14560 reg, FP_REGNUM, floats_offset - vfp_offset);
14561 start_reg = reg - 1;
14566 if (reg != start_reg)
14567 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14568 reg + 1, start_reg - reg,
14569 FP_REGNUM, floats_offset - vfp_offset);
14570 start_reg = reg - 1;
14574 /* Just in case the last register checked also needs unstacking. */
14575 if (reg != start_reg)
14576 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14577 reg + 1, start_reg - reg,
14578 FP_REGNUM, floats_offset - vfp_offset);
14581 if (TARGET_HARD_FLOAT && TARGET_VFP)
14585 /* The fldmd insns do not have base+offset addressing
14586 modes, so we use IP to hold the address. */
14587 saved_size = arm_get_vfp_saved_size ();
14589 if (saved_size > 0)
14591 floats_offset += saved_size;
14592 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14593 FP_REGNUM, floats_offset - vfp_offset);
14595 start_reg = FIRST_VFP_REGNUM;
14596 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14598 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14599 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14601 if (start_reg != reg)
14602 vfp_output_fldmd (f, IP_REGNUM,
14603 (start_reg - FIRST_VFP_REGNUM) / 2,
14604 (reg - start_reg) / 2);
14605 start_reg = reg + 2;
14608 if (start_reg != reg)
14609 vfp_output_fldmd (f, IP_REGNUM,
14610 (start_reg - FIRST_VFP_REGNUM) / 2,
14611 (reg - start_reg) / 2);
14616 /* The frame pointer is guaranteed to be non-double-word aligned.
14617 This is because it is set to (old_stack_pointer - 4) and the
14618 old_stack_pointer was double word aligned. Thus the offset to
14619 the iWMMXt registers to be loaded must also be non-double-word
14620 sized, so that the resultant address *is* double-word aligned.
14621 We can ignore floats_offset since that was already included in
14622 the live_regs_mask. */
14623 lrm_count += (lrm_count % 2 ? 2 : 1);
14625 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14626 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14628 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14629 reg, FP_REGNUM, lrm_count * 4);
14634 /* saved_regs_mask should contain the IP, which at the time of stack
14635 frame generation actually contains the old stack pointer. So a
14636 quick way to unwind the stack is just pop the IP register directly
14637 into the stack pointer. */
14638 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14639 saved_regs_mask &= ~ (1 << IP_REGNUM);
14640 saved_regs_mask |= (1 << SP_REGNUM);
14642 /* There are two registers left in saved_regs_mask - LR and PC. We
14643 only need to restore the LR register (the return address), but to
14644 save time we can load it directly into the PC, unless we need a
14645 special function exit sequence, or we are not really returning. */
14647 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14648 && !crtl->calls_eh_return)
14649 /* Delete the LR from the register mask, so that the LR on
14650 the stack is loaded into the PC in the register mask. */
14651 saved_regs_mask &= ~ (1 << LR_REGNUM);
14653 saved_regs_mask &= ~ (1 << PC_REGNUM);
14655 /* We must use SP as the base register, because SP is one of the
14656 registers being restored. If an interrupt or page fault
14657 happens in the ldm instruction, the SP might or might not
14658 have been restored. That would be bad, as then SP will no
14659 longer indicate the safe area of stack, and we can get stack
14660 corruption. Using SP as the base register means that it will
14661 be reset correctly to the original value, should an interrupt
14662 occur. If the stack pointer already points at the right
14663 place, then omit the subtraction. */
14664 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14665 || cfun->calls_alloca)
14666 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14667 4 * bit_count (saved_regs_mask));
14668 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14670 if (IS_INTERRUPT (func_type))
14671 /* Interrupt handlers will have pushed the
14672 IP onto the stack, so restore it now. */
14673 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14677 /* This branch is executed for ARM mode (non-apcs frames) and
14678 Thumb-2 mode. Frame layout is essentially the same for those
14679 cases, except that in ARM mode frame pointer points to the
14680 first saved register, while in Thumb-2 mode the frame pointer points
14681 to the last saved register.
14683 It is possible to make frame pointer point to last saved
14684 register in both cases, and remove some conditionals below.
14685 That means that fp setup in prologue would be just "mov fp, sp"
14686 and sp restore in epilogue would be just "mov sp, fp", whereas
14687 now we have to use add/sub in those cases. However, the value
14688 of that would be marginal, as both mov and add/sub are 32-bit
14689 in ARM mode, and it would require extra conditionals
14690 in arm_expand_prologue to distingish ARM-apcs-frame case
14691 (where frame pointer is required to point at first register)
14692 and ARM-non-apcs-frame. Therefore, such change is postponed
14693 until real need arise. */
14694 unsigned HOST_WIDE_INT amount;
14696 /* Restore stack pointer if necessary. */
14697 if (TARGET_ARM && frame_pointer_needed)
14699 operands[0] = stack_pointer_rtx;
14700 operands[1] = hard_frame_pointer_rtx;
14702 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14703 output_add_immediate (operands);
14707 if (frame_pointer_needed)
14709 /* For Thumb-2 restore sp from the frame pointer.
14710 Operand restrictions mean we have to incrememnt FP, then copy
14712 amount = offsets->locals_base - offsets->saved_regs;
14713 operands[0] = hard_frame_pointer_rtx;
14717 unsigned long count;
14718 operands[0] = stack_pointer_rtx;
14719 amount = offsets->outgoing_args - offsets->saved_regs;
14720 /* pop call clobbered registers if it avoids a
14721 separate stack adjustment. */
14722 count = offsets->saved_regs - offsets->saved_args;
14725 && !crtl->calls_eh_return
14726 && bit_count(saved_regs_mask) * 4 == count
14727 && !IS_INTERRUPT (func_type)
14728 && !crtl->tail_call_emit)
14730 unsigned long mask;
14731 /* Preserve return values, of any size. */
14732 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14734 mask &= ~saved_regs_mask;
14736 while (bit_count (mask) * 4 > amount)
14738 while ((mask & (1 << reg)) == 0)
14740 mask &= ~(1 << reg);
14742 if (bit_count (mask) * 4 == amount) {
14744 saved_regs_mask |= mask;
14751 operands[1] = operands[0];
14752 operands[2] = GEN_INT (amount);
14753 output_add_immediate (operands);
14755 if (frame_pointer_needed)
14756 asm_fprintf (f, "\tmov\t%r, %r\n",
14757 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14760 if (TARGET_FPA_EMU2)
14762 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14763 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14764 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14769 start_reg = FIRST_FPA_REGNUM;
14771 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14773 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14775 if (reg - start_reg == 3)
14777 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14778 start_reg, SP_REGNUM);
14779 start_reg = reg + 1;
14784 if (reg != start_reg)
14785 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14786 start_reg, reg - start_reg,
14789 start_reg = reg + 1;
14793 /* Just in case the last register checked also needs unstacking. */
14794 if (reg != start_reg)
14795 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14796 start_reg, reg - start_reg, SP_REGNUM);
14799 if (TARGET_HARD_FLOAT && TARGET_VFP)
14801 int end_reg = LAST_VFP_REGNUM + 1;
14803 /* Scan the registers in reverse order. We need to match
14804 any groupings made in the prologue and generate matching
14806 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14808 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14809 && (!df_regs_ever_live_p (reg + 1)
14810 || call_used_regs[reg + 1]))
14812 if (end_reg > reg + 2)
14813 vfp_output_fldmd (f, SP_REGNUM,
14814 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14815 (end_reg - (reg + 2)) / 2);
14819 if (end_reg > reg + 2)
14820 vfp_output_fldmd (f, SP_REGNUM, 0,
14821 (end_reg - (reg + 2)) / 2);
14825 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14826 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14827 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14829 /* If we can, restore the LR into the PC. */
14830 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14831 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14832 && !IS_STACKALIGN (func_type)
14834 && crtl->args.pretend_args_size == 0
14835 && saved_regs_mask & (1 << LR_REGNUM)
14836 && !crtl->calls_eh_return)
14838 saved_regs_mask &= ~ (1 << LR_REGNUM);
14839 saved_regs_mask |= (1 << PC_REGNUM);
14840 rfe = IS_INTERRUPT (func_type);
14845 /* Load the registers off the stack. If we only have one register
14846 to load use the LDR instruction - it is faster. For Thumb-2
14847 always use pop and the assembler will pick the best instruction.*/
14848 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14849 && !IS_INTERRUPT(func_type))
14851 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14853 else if (saved_regs_mask)
14855 if (saved_regs_mask & (1 << SP_REGNUM))
14856 /* Note - write back to the stack register is not enabled
14857 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14858 in the list of registers and if we add writeback the
14859 instruction becomes UNPREDICTABLE. */
14860 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14862 else if (TARGET_ARM)
14863 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14866 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14869 if (crtl->args.pretend_args_size)
14871 /* Unwind the pre-pushed regs. */
14872 operands[0] = operands[1] = stack_pointer_rtx;
14873 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14874 output_add_immediate (operands);
14878 /* We may have already restored PC directly from the stack. */
14879 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14882 /* Stack adjustment for exception handler. */
14883 if (crtl->calls_eh_return)
14884 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14885 ARM_EH_STACKADJ_REGNUM);
14887 /* Generate the return instruction. */
14888 switch ((int) ARM_FUNC_TYPE (func_type))
14892 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14895 case ARM_FT_EXCEPTION:
14896 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14899 case ARM_FT_INTERWORKED:
14900 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14904 if (IS_STACKALIGN (func_type))
14906 /* See comment in arm_expand_prologue. */
14907 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14909 if (arm_arch5 || arm_arch4t)
14910 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14912 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14920 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14921 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14923 arm_stack_offsets *offsets;
14929 /* Emit any call-via-reg trampolines that are needed for v4t support
14930 of call_reg and call_value_reg type insns. */
14931 for (regno = 0; regno < LR_REGNUM; regno++)
14933 rtx label = cfun->machine->call_via[regno];
14937 switch_to_section (function_section (current_function_decl));
14938 targetm.asm_out.internal_label (asm_out_file, "L",
14939 CODE_LABEL_NUMBER (label));
14940 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14944 /* ??? Probably not safe to set this here, since it assumes that a
14945 function will be emitted as assembly immediately after we generate
14946 RTL for it. This does not happen for inline functions. */
14947 cfun->machine->return_used_this_function = 0;
14949 else /* TARGET_32BIT */
14951 /* We need to take into account any stack-frame rounding. */
14952 offsets = arm_get_frame_offsets ();
14954 gcc_assert (!use_return_insn (FALSE, NULL)
14955 || (cfun->machine->return_used_this_function != 0)
14956 || offsets->saved_regs == offsets->outgoing_args
14957 || frame_pointer_needed);
14959 /* Reset the ARM-specific per-function variables. */
14960 after_arm_reorg = 0;
14964 /* Generate and emit an insn that we will recognize as a push_multi.
14965 Unfortunately, since this insn does not reflect very well the actual
14966 semantics of the operation, we need to annotate the insn for the benefit
14967 of DWARF2 frame unwind information. */
14969 emit_multi_reg_push (unsigned long mask)
14972 int num_dwarf_regs;
14976 int dwarf_par_index;
14979 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14980 if (mask & (1 << i))
14983 gcc_assert (num_regs && num_regs <= 16);
14985 /* We don't record the PC in the dwarf frame information. */
14986 num_dwarf_regs = num_regs;
14987 if (mask & (1 << PC_REGNUM))
14990 /* For the body of the insn we are going to generate an UNSPEC in
14991 parallel with several USEs. This allows the insn to be recognized
14992 by the push_multi pattern in the arm.md file.
14994 The body of the insn looks something like this:
14997 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14998 (const_int:SI <num>)))
14999 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15005 For the frame note however, we try to be more explicit and actually
15006 show each register being stored into the stack frame, plus a (single)
15007 decrement of the stack pointer. We do it this way in order to be
15008 friendly to the stack unwinding code, which only wants to see a single
15009 stack decrement per instruction. The RTL we generate for the note looks
15010 something like this:
15013 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15014 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15015 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15016 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15020 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15021 instead we'd have a parallel expression detailing all
15022 the stores to the various memory addresses so that debug
15023 information is more up-to-date. Remember however while writing
15024 this to take care of the constraints with the push instruction.
15026 Note also that this has to be taken care of for the VFP registers.
15028 For more see PR43399. */
15030 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15031 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15032 dwarf_par_index = 1;
15034 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15036 if (mask & (1 << i))
15038 reg = gen_rtx_REG (SImode, i);
15040 XVECEXP (par, 0, 0)
15041 = gen_rtx_SET (VOIDmode,
15044 gen_rtx_PRE_MODIFY (Pmode,
15047 (stack_pointer_rtx,
15050 gen_rtx_UNSPEC (BLKmode,
15051 gen_rtvec (1, reg),
15052 UNSPEC_PUSH_MULT));
15054 if (i != PC_REGNUM)
15056 tmp = gen_rtx_SET (VOIDmode,
15057 gen_frame_mem (SImode, stack_pointer_rtx),
15059 RTX_FRAME_RELATED_P (tmp) = 1;
15060 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15068 for (j = 1, i++; j < num_regs; i++)
15070 if (mask & (1 << i))
15072 reg = gen_rtx_REG (SImode, i);
15074 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15076 if (i != PC_REGNUM)
15079 = gen_rtx_SET (VOIDmode,
15082 plus_constant (stack_pointer_rtx,
15085 RTX_FRAME_RELATED_P (tmp) = 1;
15086 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15093 par = emit_insn (par);
15095 tmp = gen_rtx_SET (VOIDmode,
15097 plus_constant (stack_pointer_rtx, -4 * num_regs));
15098 RTX_FRAME_RELATED_P (tmp) = 1;
15099 XVECEXP (dwarf, 0, 0) = tmp;
15101 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15106 /* Calculate the size of the return value that is passed in registers. */
15108 arm_size_return_regs (void)
15110 enum machine_mode mode;
15112 if (crtl->return_rtx != 0)
15113 mode = GET_MODE (crtl->return_rtx);
15115 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15117 return GET_MODE_SIZE (mode);
15121 emit_sfm (int base_reg, int count)
15128 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15129 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15131 reg = gen_rtx_REG (XFmode, base_reg++);
15133 XVECEXP (par, 0, 0)
15134 = gen_rtx_SET (VOIDmode,
15137 gen_rtx_PRE_MODIFY (Pmode,
15140 (stack_pointer_rtx,
15143 gen_rtx_UNSPEC (BLKmode,
15144 gen_rtvec (1, reg),
15145 UNSPEC_PUSH_MULT));
15146 tmp = gen_rtx_SET (VOIDmode,
15147 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15148 RTX_FRAME_RELATED_P (tmp) = 1;
15149 XVECEXP (dwarf, 0, 1) = tmp;
15151 for (i = 1; i < count; i++)
15153 reg = gen_rtx_REG (XFmode, base_reg++);
15154 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15156 tmp = gen_rtx_SET (VOIDmode,
15157 gen_frame_mem (XFmode,
15158 plus_constant (stack_pointer_rtx,
15161 RTX_FRAME_RELATED_P (tmp) = 1;
15162 XVECEXP (dwarf, 0, i + 1) = tmp;
15165 tmp = gen_rtx_SET (VOIDmode,
15167 plus_constant (stack_pointer_rtx, -12 * count));
15169 RTX_FRAME_RELATED_P (tmp) = 1;
15170 XVECEXP (dwarf, 0, 0) = tmp;
15172 par = emit_insn (par);
15173 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15179 /* Return true if the current function needs to save/restore LR. */
15182 thumb_force_lr_save (void)
15184 return !cfun->machine->lr_save_eliminated
15185 && (!leaf_function_p ()
15186 || thumb_far_jump_used_p ()
15187 || df_regs_ever_live_p (LR_REGNUM));
15191 /* Return true if r3 is used by any of the tail call insns in the
15192 current function. */
15195 any_sibcall_uses_r3 (void)
15200 if (!crtl->tail_call_emit)
15202 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15203 if (e->flags & EDGE_SIBCALL)
15205 rtx call = BB_END (e->src);
15206 if (!CALL_P (call))
15207 call = prev_nonnote_nondebug_insn (call);
15208 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15209 if (find_regno_fusage (call, USE, 3))
15216 /* Compute the distance from register FROM to register TO.
15217 These can be the arg pointer (26), the soft frame pointer (25),
15218 the stack pointer (13) or the hard frame pointer (11).
15219 In thumb mode r7 is used as the soft frame pointer, if needed.
15220 Typical stack layout looks like this:
15222 old stack pointer -> | |
15225 | | saved arguments for
15226 | | vararg functions
15229 hard FP & arg pointer -> | | \
15237 soft frame pointer -> | | /
15242 locals base pointer -> | | /
15247 current stack pointer -> | | /
15250 For a given function some or all of these stack components
15251 may not be needed, giving rise to the possibility of
15252 eliminating some of the registers.
15254 The values returned by this function must reflect the behavior
15255 of arm_expand_prologue() and arm_compute_save_reg_mask().
15257 The sign of the number returned reflects the direction of stack
15258 growth, so the values are positive for all eliminations except
15259 from the soft frame pointer to the hard frame pointer.
15261 SFP may point just inside the local variables block to ensure correct
15265 /* Calculate stack offsets. These are used to calculate register elimination
15266 offsets and in prologue/epilogue code. Also calculates which registers
15267 should be saved. */
15269 static arm_stack_offsets *
15270 arm_get_frame_offsets (void)
15272 struct arm_stack_offsets *offsets;
15273 unsigned long func_type;
15277 HOST_WIDE_INT frame_size;
15280 offsets = &cfun->machine->stack_offsets;
15282 /* We need to know if we are a leaf function. Unfortunately, it
15283 is possible to be called after start_sequence has been called,
15284 which causes get_insns to return the insns for the sequence,
15285 not the function, which will cause leaf_function_p to return
15286 the incorrect result.
15288 to know about leaf functions once reload has completed, and the
15289 frame size cannot be changed after that time, so we can safely
15290 use the cached value. */
15292 if (reload_completed)
15295 /* Initially this is the size of the local variables. It will translated
15296 into an offset once we have determined the size of preceding data. */
15297 frame_size = ROUND_UP_WORD (get_frame_size ());
15299 leaf = leaf_function_p ();
15301 /* Space for variadic functions. */
15302 offsets->saved_args = crtl->args.pretend_args_size;
15304 /* In Thumb mode this is incorrect, but never used. */
15305 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15306 arm_compute_static_chain_stack_bytes();
15310 unsigned int regno;
15312 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15313 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15314 saved = core_saved;
15316 /* We know that SP will be doubleword aligned on entry, and we must
15317 preserve that condition at any subroutine call. We also require the
15318 soft frame pointer to be doubleword aligned. */
15320 if (TARGET_REALLY_IWMMXT)
15322 /* Check for the call-saved iWMMXt registers. */
15323 for (regno = FIRST_IWMMXT_REGNUM;
15324 regno <= LAST_IWMMXT_REGNUM;
15326 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15330 func_type = arm_current_func_type ();
15331 if (! IS_VOLATILE (func_type))
15333 /* Space for saved FPA registers. */
15334 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15335 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15338 /* Space for saved VFP registers. */
15339 if (TARGET_HARD_FLOAT && TARGET_VFP)
15340 saved += arm_get_vfp_saved_size ();
15343 else /* TARGET_THUMB1 */
15345 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15346 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15347 saved = core_saved;
15348 if (TARGET_BACKTRACE)
15352 /* Saved registers include the stack frame. */
15353 offsets->saved_regs = offsets->saved_args + saved +
15354 arm_compute_static_chain_stack_bytes();
15355 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15356 /* A leaf function does not need any stack alignment if it has nothing
15358 if (leaf && frame_size == 0)
15360 offsets->outgoing_args = offsets->soft_frame;
15361 offsets->locals_base = offsets->soft_frame;
15365 /* Ensure SFP has the correct alignment. */
15366 if (ARM_DOUBLEWORD_ALIGN
15367 && (offsets->soft_frame & 7))
15369 offsets->soft_frame += 4;
15370 /* Try to align stack by pushing an extra reg. Don't bother doing this
15371 when there is a stack frame as the alignment will be rolled into
15372 the normal stack adjustment. */
15373 if (frame_size + crtl->outgoing_args_size == 0)
15377 /* If it is safe to use r3, then do so. This sometimes
15378 generates better code on Thumb-2 by avoiding the need to
15379 use 32-bit push/pop instructions. */
15380 if (! any_sibcall_uses_r3 ()
15381 && arm_size_return_regs () <= 12
15382 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15387 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15389 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15398 offsets->saved_regs += 4;
15399 offsets->saved_regs_mask |= (1 << reg);
15404 offsets->locals_base = offsets->soft_frame + frame_size;
15405 offsets->outgoing_args = (offsets->locals_base
15406 + crtl->outgoing_args_size);
15408 if (ARM_DOUBLEWORD_ALIGN)
15410 /* Ensure SP remains doubleword aligned. */
15411 if (offsets->outgoing_args & 7)
15412 offsets->outgoing_args += 4;
15413 gcc_assert (!(offsets->outgoing_args & 7));
15420 /* Calculate the relative offsets for the different stack pointers. Positive
15421 offsets are in the direction of stack growth. */
15424 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15426 arm_stack_offsets *offsets;
15428 offsets = arm_get_frame_offsets ();
15430 /* OK, now we have enough information to compute the distances.
15431 There must be an entry in these switch tables for each pair
15432 of registers in ELIMINABLE_REGS, even if some of the entries
15433 seem to be redundant or useless. */
15436 case ARG_POINTER_REGNUM:
15439 case THUMB_HARD_FRAME_POINTER_REGNUM:
15442 case FRAME_POINTER_REGNUM:
15443 /* This is the reverse of the soft frame pointer
15444 to hard frame pointer elimination below. */
15445 return offsets->soft_frame - offsets->saved_args;
15447 case ARM_HARD_FRAME_POINTER_REGNUM:
15448 /* This is only non-zero in the case where the static chain register
15449 is stored above the frame. */
15450 return offsets->frame - offsets->saved_args - 4;
15452 case STACK_POINTER_REGNUM:
15453 /* If nothing has been pushed on the stack at all
15454 then this will return -4. This *is* correct! */
15455 return offsets->outgoing_args - (offsets->saved_args + 4);
15458 gcc_unreachable ();
15460 gcc_unreachable ();
15462 case FRAME_POINTER_REGNUM:
15465 case THUMB_HARD_FRAME_POINTER_REGNUM:
15468 case ARM_HARD_FRAME_POINTER_REGNUM:
15469 /* The hard frame pointer points to the top entry in the
15470 stack frame. The soft frame pointer to the bottom entry
15471 in the stack frame. If there is no stack frame at all,
15472 then they are identical. */
15474 return offsets->frame - offsets->soft_frame;
15476 case STACK_POINTER_REGNUM:
15477 return offsets->outgoing_args - offsets->soft_frame;
15480 gcc_unreachable ();
15482 gcc_unreachable ();
15485 /* You cannot eliminate from the stack pointer.
15486 In theory you could eliminate from the hard frame
15487 pointer to the stack pointer, but this will never
15488 happen, since if a stack frame is not needed the
15489 hard frame pointer will never be used. */
15490 gcc_unreachable ();
15494 /* Given FROM and TO register numbers, say whether this elimination is
15495 allowed. Frame pointer elimination is automatically handled.
15497 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15498 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15499 pointer, we must eliminate FRAME_POINTER_REGNUM into
15500 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15501 ARG_POINTER_REGNUM. */
15504 arm_can_eliminate (const int from, const int to)
15506 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15507 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15508 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15509 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15513 /* Emit RTL to save coprocessor registers on function entry. Returns the
15514 number of bytes pushed. */
15517 arm_save_coproc_regs(void)
15519 int saved_size = 0;
15521 unsigned start_reg;
15524 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15525 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15527 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15528 insn = gen_rtx_MEM (V2SImode, insn);
15529 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15530 RTX_FRAME_RELATED_P (insn) = 1;
15534 /* Save any floating point call-saved registers used by this
15536 if (TARGET_FPA_EMU2)
15538 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15539 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15541 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15542 insn = gen_rtx_MEM (XFmode, insn);
15543 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15544 RTX_FRAME_RELATED_P (insn) = 1;
15550 start_reg = LAST_FPA_REGNUM;
15552 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15554 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15556 if (start_reg - reg == 3)
15558 insn = emit_sfm (reg, 4);
15559 RTX_FRAME_RELATED_P (insn) = 1;
15561 start_reg = reg - 1;
15566 if (start_reg != reg)
15568 insn = emit_sfm (reg + 1, start_reg - reg);
15569 RTX_FRAME_RELATED_P (insn) = 1;
15570 saved_size += (start_reg - reg) * 12;
15572 start_reg = reg - 1;
15576 if (start_reg != reg)
15578 insn = emit_sfm (reg + 1, start_reg - reg);
15579 saved_size += (start_reg - reg) * 12;
15580 RTX_FRAME_RELATED_P (insn) = 1;
15583 if (TARGET_HARD_FLOAT && TARGET_VFP)
15585 start_reg = FIRST_VFP_REGNUM;
15587 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15589 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15590 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15592 if (start_reg != reg)
15593 saved_size += vfp_emit_fstmd (start_reg,
15594 (reg - start_reg) / 2);
15595 start_reg = reg + 2;
15598 if (start_reg != reg)
15599 saved_size += vfp_emit_fstmd (start_reg,
15600 (reg - start_reg) / 2);
15606 /* Set the Thumb frame pointer from the stack pointer. */
15609 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15611 HOST_WIDE_INT amount;
15614 amount = offsets->outgoing_args - offsets->locals_base;
15616 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15617 stack_pointer_rtx, GEN_INT (amount)));
15620 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15621 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15622 expects the first two operands to be the same. */
15625 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15627 hard_frame_pointer_rtx));
15631 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15632 hard_frame_pointer_rtx,
15633 stack_pointer_rtx));
15635 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15636 plus_constant (stack_pointer_rtx, amount));
15637 RTX_FRAME_RELATED_P (dwarf) = 1;
15638 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15641 RTX_FRAME_RELATED_P (insn) = 1;
15644 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15647 arm_expand_prologue (void)
15652 unsigned long live_regs_mask;
15653 unsigned long func_type;
15655 int saved_pretend_args = 0;
15656 int saved_regs = 0;
15657 unsigned HOST_WIDE_INT args_to_push;
15658 arm_stack_offsets *offsets;
15660 func_type = arm_current_func_type ();
15662 /* Naked functions don't have prologues. */
15663 if (IS_NAKED (func_type))
15666 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15667 args_to_push = crtl->args.pretend_args_size;
15669 /* Compute which register we will have to save onto the stack. */
15670 offsets = arm_get_frame_offsets ();
15671 live_regs_mask = offsets->saved_regs_mask;
15673 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15675 if (IS_STACKALIGN (func_type))
15680 /* Handle a word-aligned stack pointer. We generate the following:
15685 <save and restore r0 in normal prologue/epilogue>
15689 The unwinder doesn't need to know about the stack realignment.
15690 Just tell it we saved SP in r0. */
15691 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15693 r0 = gen_rtx_REG (SImode, 0);
15694 r1 = gen_rtx_REG (SImode, 1);
15695 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15696 compiler won't choke. */
15697 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15698 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15699 insn = gen_movsi (r0, stack_pointer_rtx);
15700 RTX_FRAME_RELATED_P (insn) = 1;
15701 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15703 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15704 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15707 /* For APCS frames, if IP register is clobbered
15708 when creating frame, save that register in a special
15710 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15712 if (IS_INTERRUPT (func_type))
15714 /* Interrupt functions must not corrupt any registers.
15715 Creating a frame pointer however, corrupts the IP
15716 register, so we must push it first. */
15717 insn = emit_multi_reg_push (1 << IP_REGNUM);
15719 /* Do not set RTX_FRAME_RELATED_P on this insn.
15720 The dwarf stack unwinding code only wants to see one
15721 stack decrement per function, and this is not it. If
15722 this instruction is labeled as being part of the frame
15723 creation sequence then dwarf2out_frame_debug_expr will
15724 die when it encounters the assignment of IP to FP
15725 later on, since the use of SP here establishes SP as
15726 the CFA register and not IP.
15728 Anyway this instruction is not really part of the stack
15729 frame creation although it is part of the prologue. */
15731 else if (IS_NESTED (func_type))
15733 /* The Static chain register is the same as the IP register
15734 used as a scratch register during stack frame creation.
15735 To get around this need to find somewhere to store IP
15736 whilst the frame is being created. We try the following
15739 1. The last argument register.
15740 2. A slot on the stack above the frame. (This only
15741 works if the function is not a varargs function).
15742 3. Register r3, after pushing the argument registers
15745 Note - we only need to tell the dwarf2 backend about the SP
15746 adjustment in the second variant; the static chain register
15747 doesn't need to be unwound, as it doesn't contain a value
15748 inherited from the caller. */
15750 if (df_regs_ever_live_p (3) == false)
15751 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15752 else if (args_to_push == 0)
15756 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15759 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15760 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15763 /* Just tell the dwarf backend that we adjusted SP. */
15764 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15765 plus_constant (stack_pointer_rtx,
15767 RTX_FRAME_RELATED_P (insn) = 1;
15768 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15772 /* Store the args on the stack. */
15773 if (cfun->machine->uses_anonymous_args)
15774 insn = emit_multi_reg_push
15775 ((0xf0 >> (args_to_push / 4)) & 0xf);
15778 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15779 GEN_INT (- args_to_push)));
15781 RTX_FRAME_RELATED_P (insn) = 1;
15783 saved_pretend_args = 1;
15784 fp_offset = args_to_push;
15787 /* Now reuse r3 to preserve IP. */
15788 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15792 insn = emit_set_insn (ip_rtx,
15793 plus_constant (stack_pointer_rtx, fp_offset));
15794 RTX_FRAME_RELATED_P (insn) = 1;
15799 /* Push the argument registers, or reserve space for them. */
15800 if (cfun->machine->uses_anonymous_args)
15801 insn = emit_multi_reg_push
15802 ((0xf0 >> (args_to_push / 4)) & 0xf);
15805 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15806 GEN_INT (- args_to_push)));
15807 RTX_FRAME_RELATED_P (insn) = 1;
15810 /* If this is an interrupt service routine, and the link register
15811 is going to be pushed, and we're not generating extra
15812 push of IP (needed when frame is needed and frame layout if apcs),
15813 subtracting four from LR now will mean that the function return
15814 can be done with a single instruction. */
15815 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15816 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15817 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15820 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15822 emit_set_insn (lr, plus_constant (lr, -4));
15825 if (live_regs_mask)
15827 saved_regs += bit_count (live_regs_mask) * 4;
15828 if (optimize_size && !frame_pointer_needed
15829 && saved_regs == offsets->saved_regs - offsets->saved_args)
15831 /* If no coprocessor registers are being pushed and we don't have
15832 to worry about a frame pointer then push extra registers to
15833 create the stack frame. This is done is a way that does not
15834 alter the frame layout, so is independent of the epilogue. */
15838 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15840 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15841 if (frame && n * 4 >= frame)
15844 live_regs_mask |= (1 << n) - 1;
15845 saved_regs += frame;
15848 insn = emit_multi_reg_push (live_regs_mask);
15849 RTX_FRAME_RELATED_P (insn) = 1;
15852 if (! IS_VOLATILE (func_type))
15853 saved_regs += arm_save_coproc_regs ();
15855 if (frame_pointer_needed && TARGET_ARM)
15857 /* Create the new frame pointer. */
15858 if (TARGET_APCS_FRAME)
15860 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15861 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15862 RTX_FRAME_RELATED_P (insn) = 1;
15864 if (IS_NESTED (func_type))
15866 /* Recover the static chain register. */
15867 if (!df_regs_ever_live_p (3)
15868 || saved_pretend_args)
15869 insn = gen_rtx_REG (SImode, 3);
15870 else /* if (crtl->args.pretend_args_size == 0) */
15872 insn = plus_constant (hard_frame_pointer_rtx, 4);
15873 insn = gen_frame_mem (SImode, insn);
15875 emit_set_insn (ip_rtx, insn);
15876 /* Add a USE to stop propagate_one_insn() from barfing. */
15877 emit_insn (gen_prologue_use (ip_rtx));
15882 insn = GEN_INT (saved_regs - 4);
15883 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15884 stack_pointer_rtx, insn));
15885 RTX_FRAME_RELATED_P (insn) = 1;
15889 if (flag_stack_usage)
15890 current_function_static_stack_size
15891 = offsets->outgoing_args - offsets->saved_args;
15893 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15895 /* This add can produce multiple insns for a large constant, so we
15896 need to get tricky. */
15897 rtx last = get_last_insn ();
15899 amount = GEN_INT (offsets->saved_args + saved_regs
15900 - offsets->outgoing_args);
15902 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15906 last = last ? NEXT_INSN (last) : get_insns ();
15907 RTX_FRAME_RELATED_P (last) = 1;
15909 while (last != insn);
15911 /* If the frame pointer is needed, emit a special barrier that
15912 will prevent the scheduler from moving stores to the frame
15913 before the stack adjustment. */
15914 if (frame_pointer_needed)
15915 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15916 hard_frame_pointer_rtx));
15920 if (frame_pointer_needed && TARGET_THUMB2)
15921 thumb_set_frame_pointer (offsets);
15923 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15925 unsigned long mask;
15927 mask = live_regs_mask;
15928 mask &= THUMB2_WORK_REGS;
15929 if (!IS_NESTED (func_type))
15930 mask |= (1 << IP_REGNUM);
15931 arm_load_pic_register (mask);
15934 /* If we are profiling, make sure no instructions are scheduled before
15935 the call to mcount. Similarly if the user has requested no
15936 scheduling in the prolog. Similarly if we want non-call exceptions
15937 using the EABI unwinder, to prevent faulting instructions from being
15938 swapped with a stack adjustment. */
15939 if (crtl->profile || !TARGET_SCHED_PROLOG
15940 || (arm_except_unwind_info (&global_options) == UI_TARGET
15941 && cfun->can_throw_non_call_exceptions))
15942 emit_insn (gen_blockage ());
15944 /* If the link register is being kept alive, with the return address in it,
15945 then make sure that it does not get reused by the ce2 pass. */
15946 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15947 cfun->machine->lr_save_eliminated = 1;
15950 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15952 arm_print_condition (FILE *stream)
15954 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15956 /* Branch conversion is not implemented for Thumb-2. */
15959 output_operand_lossage ("predicated Thumb instruction");
15962 if (current_insn_predicate != NULL)
15964 output_operand_lossage
15965 ("predicated instruction in conditional sequence");
15969 fputs (arm_condition_codes[arm_current_cc], stream);
15971 else if (current_insn_predicate)
15973 enum arm_cond_code code;
15977 output_operand_lossage ("predicated Thumb instruction");
15981 code = get_arm_condition_code (current_insn_predicate);
15982 fputs (arm_condition_codes[code], stream);
15987 /* If CODE is 'd', then the X is a condition operand and the instruction
15988 should only be executed if the condition is true.
15989 if CODE is 'D', then the X is a condition operand and the instruction
15990 should only be executed if the condition is false: however, if the mode
15991 of the comparison is CCFPEmode, then always execute the instruction -- we
15992 do this because in these circumstances !GE does not necessarily imply LT;
15993 in these cases the instruction pattern will take care to make sure that
15994 an instruction containing %d will follow, thereby undoing the effects of
15995 doing this instruction unconditionally.
15996 If CODE is 'N' then X is a floating point operand that must be negated
15998 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15999 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16001 arm_print_operand (FILE *stream, rtx x, int code)
16006 fputs (ASM_COMMENT_START, stream);
16010 fputs (user_label_prefix, stream);
16014 fputs (REGISTER_PREFIX, stream);
16018 arm_print_condition (stream);
16022 /* Nothing in unified syntax, otherwise the current condition code. */
16023 if (!TARGET_UNIFIED_ASM)
16024 arm_print_condition (stream);
16028 /* The current condition code in unified syntax, otherwise nothing. */
16029 if (TARGET_UNIFIED_ASM)
16030 arm_print_condition (stream);
16034 /* The current condition code for a condition code setting instruction.
16035 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16036 if (TARGET_UNIFIED_ASM)
16038 fputc('s', stream);
16039 arm_print_condition (stream);
16043 arm_print_condition (stream);
16044 fputc('s', stream);
16049 /* If the instruction is conditionally executed then print
16050 the current condition code, otherwise print 's'. */
16051 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16052 if (current_insn_predicate)
16053 arm_print_condition (stream);
16055 fputc('s', stream);
16058 /* %# is a "break" sequence. It doesn't output anything, but is used to
16059 separate e.g. operand numbers from following text, if that text consists
16060 of further digits which we don't want to be part of the operand
16068 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16069 r = real_value_negate (&r);
16070 fprintf (stream, "%s", fp_const_from_val (&r));
16074 /* An integer or symbol address without a preceding # sign. */
16076 switch (GET_CODE (x))
16079 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16083 output_addr_const (stream, x);
16087 gcc_unreachable ();
16092 if (GET_CODE (x) == CONST_INT)
16095 val = ARM_SIGN_EXTEND (~INTVAL (x));
16096 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16100 putc ('~', stream);
16101 output_addr_const (stream, x);
16106 /* The low 16 bits of an immediate constant. */
16107 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16111 fprintf (stream, "%s", arithmetic_instr (x, 1));
16114 /* Truncate Cirrus shift counts. */
16116 if (GET_CODE (x) == CONST_INT)
16118 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16121 arm_print_operand (stream, x, 0);
16125 fprintf (stream, "%s", arithmetic_instr (x, 0));
16133 if (!shift_operator (x, SImode))
16135 output_operand_lossage ("invalid shift operand");
16139 shift = shift_op (x, &val);
16143 fprintf (stream, ", %s ", shift);
16145 arm_print_operand (stream, XEXP (x, 1), 0);
16147 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16152 /* An explanation of the 'Q', 'R' and 'H' register operands:
16154 In a pair of registers containing a DI or DF value the 'Q'
16155 operand returns the register number of the register containing
16156 the least significant part of the value. The 'R' operand returns
16157 the register number of the register containing the most
16158 significant part of the value.
16160 The 'H' operand returns the higher of the two register numbers.
16161 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16162 same as the 'Q' operand, since the most significant part of the
16163 value is held in the lower number register. The reverse is true
16164 on systems where WORDS_BIG_ENDIAN is false.
16166 The purpose of these operands is to distinguish between cases
16167 where the endian-ness of the values is important (for example
16168 when they are added together), and cases where the endian-ness
16169 is irrelevant, but the order of register operations is important.
16170 For example when loading a value from memory into a register
16171 pair, the endian-ness does not matter. Provided that the value
16172 from the lower memory address is put into the lower numbered
16173 register, and the value from the higher address is put into the
16174 higher numbered register, the load will work regardless of whether
16175 the value being loaded is big-wordian or little-wordian. The
16176 order of the two register loads can matter however, if the address
16177 of the memory location is actually held in one of the registers
16178 being overwritten by the load.
16180 The 'Q' and 'R' constraints are also available for 64-bit
16183 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16185 rtx part = gen_lowpart (SImode, x);
16186 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16190 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16192 output_operand_lossage ("invalid operand for code '%c'", code);
16196 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16200 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16202 enum machine_mode mode = GET_MODE (x);
16205 if (mode == VOIDmode)
16207 part = gen_highpart_mode (SImode, mode, x);
16208 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16212 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16214 output_operand_lossage ("invalid operand for code '%c'", code);
16218 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16222 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16224 output_operand_lossage ("invalid operand for code '%c'", code);
16228 asm_fprintf (stream, "%r", REGNO (x) + 1);
16232 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16234 output_operand_lossage ("invalid operand for code '%c'", code);
16238 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16242 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16244 output_operand_lossage ("invalid operand for code '%c'", code);
16248 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16252 asm_fprintf (stream, "%r",
16253 GET_CODE (XEXP (x, 0)) == REG
16254 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16258 asm_fprintf (stream, "{%r-%r}",
16260 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16263 /* Like 'M', but writing doubleword vector registers, for use by Neon
16267 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16268 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16270 asm_fprintf (stream, "{d%d}", regno);
16272 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16277 /* CONST_TRUE_RTX means always -- that's the default. */
16278 if (x == const_true_rtx)
16281 if (!COMPARISON_P (x))
16283 output_operand_lossage ("invalid operand for code '%c'", code);
16287 fputs (arm_condition_codes[get_arm_condition_code (x)],
16292 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16293 want to do that. */
16294 if (x == const_true_rtx)
16296 output_operand_lossage ("instruction never executed");
16299 if (!COMPARISON_P (x))
16301 output_operand_lossage ("invalid operand for code '%c'", code);
16305 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16306 (get_arm_condition_code (x))],
16310 /* Cirrus registers can be accessed in a variety of ways:
16311 single floating point (f)
16312 double floating point (d)
16314 64bit integer (dx). */
16315 case 'W': /* Cirrus register in F mode. */
16316 case 'X': /* Cirrus register in D mode. */
16317 case 'Y': /* Cirrus register in FX mode. */
16318 case 'Z': /* Cirrus register in DX mode. */
16319 gcc_assert (GET_CODE (x) == REG
16320 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16322 fprintf (stream, "mv%s%s",
16324 : code == 'X' ? "d"
16325 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16329 /* Print cirrus register in the mode specified by the register's mode. */
16332 int mode = GET_MODE (x);
16334 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16336 output_operand_lossage ("invalid operand for code '%c'", code);
16340 fprintf (stream, "mv%s%s",
16341 mode == DFmode ? "d"
16342 : mode == SImode ? "fx"
16343 : mode == DImode ? "dx"
16344 : "f", reg_names[REGNO (x)] + 2);
16350 if (GET_CODE (x) != REG
16351 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16352 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16353 /* Bad value for wCG register number. */
16355 output_operand_lossage ("invalid operand for code '%c'", code);
16360 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16363 /* Print an iWMMXt control register name. */
16365 if (GET_CODE (x) != CONST_INT
16367 || INTVAL (x) >= 16)
16368 /* Bad value for wC register number. */
16370 output_operand_lossage ("invalid operand for code '%c'", code);
16376 static const char * wc_reg_names [16] =
16378 "wCID", "wCon", "wCSSF", "wCASF",
16379 "wC4", "wC5", "wC6", "wC7",
16380 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16381 "wC12", "wC13", "wC14", "wC15"
16384 fprintf (stream, wc_reg_names [INTVAL (x)]);
16388 /* Print the high single-precision register of a VFP double-precision
16392 int mode = GET_MODE (x);
16395 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16397 output_operand_lossage ("invalid operand for code '%c'", code);
16402 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16404 output_operand_lossage ("invalid operand for code '%c'", code);
16408 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16412 /* Print a VFP/Neon double precision or quad precision register name. */
16416 int mode = GET_MODE (x);
16417 int is_quad = (code == 'q');
16420 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16422 output_operand_lossage ("invalid operand for code '%c'", code);
16426 if (GET_CODE (x) != REG
16427 || !IS_VFP_REGNUM (REGNO (x)))
16429 output_operand_lossage ("invalid operand for code '%c'", code);
16434 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16435 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16437 output_operand_lossage ("invalid operand for code '%c'", code);
16441 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16442 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16446 /* These two codes print the low/high doubleword register of a Neon quad
16447 register, respectively. For pair-structure types, can also print
16448 low/high quadword registers. */
16452 int mode = GET_MODE (x);
16455 if ((GET_MODE_SIZE (mode) != 16
16456 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16458 output_operand_lossage ("invalid operand for code '%c'", code);
16463 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16465 output_operand_lossage ("invalid operand for code '%c'", code);
16469 if (GET_MODE_SIZE (mode) == 16)
16470 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16471 + (code == 'f' ? 1 : 0));
16473 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16474 + (code == 'f' ? 1 : 0));
16478 /* Print a VFPv3 floating-point constant, represented as an integer
16482 int index = vfp3_const_double_index (x);
16483 gcc_assert (index != -1);
16484 fprintf (stream, "%d", index);
16488 /* Print bits representing opcode features for Neon.
16490 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16491 and polynomials as unsigned.
16493 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16495 Bit 2 is 1 for rounding functions, 0 otherwise. */
16497 /* Identify the type as 's', 'u', 'p' or 'f'. */
16500 HOST_WIDE_INT bits = INTVAL (x);
16501 fputc ("uspf"[bits & 3], stream);
16505 /* Likewise, but signed and unsigned integers are both 'i'. */
16508 HOST_WIDE_INT bits = INTVAL (x);
16509 fputc ("iipf"[bits & 3], stream);
16513 /* As for 'T', but emit 'u' instead of 'p'. */
16516 HOST_WIDE_INT bits = INTVAL (x);
16517 fputc ("usuf"[bits & 3], stream);
16521 /* Bit 2: rounding (vs none). */
16524 HOST_WIDE_INT bits = INTVAL (x);
16525 fputs ((bits & 4) != 0 ? "r" : "", stream);
16529 /* Memory operand for vld1/vst1 instruction. */
16533 bool postinc = FALSE;
16534 unsigned align, modesize, align_bits;
16536 gcc_assert (GET_CODE (x) == MEM);
16537 addr = XEXP (x, 0);
16538 if (GET_CODE (addr) == POST_INC)
16541 addr = XEXP (addr, 0);
16543 asm_fprintf (stream, "[%r", REGNO (addr));
16545 /* We know the alignment of this access, so we can emit a hint in the
16546 instruction (for some alignments) as an aid to the memory subsystem
16548 align = MEM_ALIGN (x) >> 3;
16549 modesize = GET_MODE_SIZE (GET_MODE (x));
16551 /* Only certain alignment specifiers are supported by the hardware. */
16552 if (modesize == 16 && (align % 32) == 0)
16554 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16556 else if ((align % 8) == 0)
16561 if (align_bits != 0)
16562 asm_fprintf (stream, ":%d", align_bits);
16564 asm_fprintf (stream, "]");
16567 fputs("!", stream);
16575 gcc_assert (GET_CODE (x) == MEM);
16576 addr = XEXP (x, 0);
16577 gcc_assert (GET_CODE (addr) == REG);
16578 asm_fprintf (stream, "[%r]", REGNO (addr));
16582 /* Translate an S register number into a D register number and element index. */
16585 int mode = GET_MODE (x);
16588 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16590 output_operand_lossage ("invalid operand for code '%c'", code);
16595 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16597 output_operand_lossage ("invalid operand for code '%c'", code);
16601 regno = regno - FIRST_VFP_REGNUM;
16602 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16606 /* Register specifier for vld1.16/vst1.16. Translate the S register
16607 number into a D register number and element index. */
16610 int mode = GET_MODE (x);
16613 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16615 output_operand_lossage ("invalid operand for code '%c'", code);
16620 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16622 output_operand_lossage ("invalid operand for code '%c'", code);
16626 regno = regno - FIRST_VFP_REGNUM;
16627 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16634 output_operand_lossage ("missing operand");
16638 switch (GET_CODE (x))
16641 asm_fprintf (stream, "%r", REGNO (x));
16645 output_memory_reference_mode = GET_MODE (x);
16646 output_address (XEXP (x, 0));
16653 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16654 sizeof (fpstr), 0, 1);
16655 fprintf (stream, "#%s", fpstr);
16658 fprintf (stream, "#%s", fp_immediate_constant (x));
16662 gcc_assert (GET_CODE (x) != NEG);
16663 fputc ('#', stream);
16664 if (GET_CODE (x) == HIGH)
16666 fputs (":lower16:", stream);
16670 output_addr_const (stream, x);
16676 /* Target hook for printing a memory address. */
16678 arm_print_operand_address (FILE *stream, rtx x)
16682 int is_minus = GET_CODE (x) == MINUS;
16684 if (GET_CODE (x) == REG)
16685 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16686 else if (GET_CODE (x) == PLUS || is_minus)
16688 rtx base = XEXP (x, 0);
16689 rtx index = XEXP (x, 1);
16690 HOST_WIDE_INT offset = 0;
16691 if (GET_CODE (base) != REG
16692 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16694 /* Ensure that BASE is a register. */
16695 /* (one of them must be). */
16696 /* Also ensure the SP is not used as in index register. */
16701 switch (GET_CODE (index))
16704 offset = INTVAL (index);
16707 asm_fprintf (stream, "[%r, #%wd]",
16708 REGNO (base), offset);
16712 asm_fprintf (stream, "[%r, %s%r]",
16713 REGNO (base), is_minus ? "-" : "",
16723 asm_fprintf (stream, "[%r, %s%r",
16724 REGNO (base), is_minus ? "-" : "",
16725 REGNO (XEXP (index, 0)));
16726 arm_print_operand (stream, index, 'S');
16727 fputs ("]", stream);
16732 gcc_unreachable ();
16735 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16736 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16738 extern enum machine_mode output_memory_reference_mode;
16740 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16742 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16743 asm_fprintf (stream, "[%r, #%s%d]!",
16744 REGNO (XEXP (x, 0)),
16745 GET_CODE (x) == PRE_DEC ? "-" : "",
16746 GET_MODE_SIZE (output_memory_reference_mode));
16748 asm_fprintf (stream, "[%r], #%s%d",
16749 REGNO (XEXP (x, 0)),
16750 GET_CODE (x) == POST_DEC ? "-" : "",
16751 GET_MODE_SIZE (output_memory_reference_mode));
16753 else if (GET_CODE (x) == PRE_MODIFY)
16755 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16756 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16757 asm_fprintf (stream, "#%wd]!",
16758 INTVAL (XEXP (XEXP (x, 1), 1)));
16760 asm_fprintf (stream, "%r]!",
16761 REGNO (XEXP (XEXP (x, 1), 1)));
16763 else if (GET_CODE (x) == POST_MODIFY)
16765 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16766 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16767 asm_fprintf (stream, "#%wd",
16768 INTVAL (XEXP (XEXP (x, 1), 1)));
16770 asm_fprintf (stream, "%r",
16771 REGNO (XEXP (XEXP (x, 1), 1)));
16773 else output_addr_const (stream, x);
16777 if (GET_CODE (x) == REG)
16778 asm_fprintf (stream, "[%r]", REGNO (x));
16779 else if (GET_CODE (x) == POST_INC)
16780 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16781 else if (GET_CODE (x) == PLUS)
16783 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16784 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16785 asm_fprintf (stream, "[%r, #%wd]",
16786 REGNO (XEXP (x, 0)),
16787 INTVAL (XEXP (x, 1)));
16789 asm_fprintf (stream, "[%r, %r]",
16790 REGNO (XEXP (x, 0)),
16791 REGNO (XEXP (x, 1)));
16794 output_addr_const (stream, x);
16798 /* Target hook for indicating whether a punctuation character for
16799 TARGET_PRINT_OPERAND is valid. */
16801 arm_print_operand_punct_valid_p (unsigned char code)
16803 return (code == '@' || code == '|' || code == '.'
16804 || code == '(' || code == ')' || code == '#'
16805 || (TARGET_32BIT && (code == '?'))
16806 || (TARGET_THUMB2 && (code == '!'))
16807 || (TARGET_THUMB && (code == '_')));
16810 /* Target hook for assembling integer objects. The ARM version needs to
16811 handle word-sized values specially. */
16813 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16815 enum machine_mode mode;
16817 if (size == UNITS_PER_WORD && aligned_p)
16819 fputs ("\t.word\t", asm_out_file);
16820 output_addr_const (asm_out_file, x);
16822 /* Mark symbols as position independent. We only do this in the
16823 .text segment, not in the .data segment. */
16824 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16825 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16827 /* See legitimize_pic_address for an explanation of the
16828 TARGET_VXWORKS_RTP check. */
16829 if (TARGET_VXWORKS_RTP
16830 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16831 fputs ("(GOT)", asm_out_file);
16833 fputs ("(GOTOFF)", asm_out_file);
16835 fputc ('\n', asm_out_file);
16839 mode = GET_MODE (x);
16841 if (arm_vector_mode_supported_p (mode))
16845 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16847 units = CONST_VECTOR_NUNITS (x);
16848 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16850 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16851 for (i = 0; i < units; i++)
16853 rtx elt = CONST_VECTOR_ELT (x, i);
16855 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16858 for (i = 0; i < units; i++)
16860 rtx elt = CONST_VECTOR_ELT (x, i);
16861 REAL_VALUE_TYPE rval;
16863 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16866 (rval, GET_MODE_INNER (mode),
16867 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16873 return default_assemble_integer (x, size, aligned_p);
16877 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16881 if (!TARGET_AAPCS_BASED)
16884 default_named_section_asm_out_constructor
16885 : default_named_section_asm_out_destructor) (symbol, priority);
16889 /* Put these in the .init_array section, using a special relocation. */
16890 if (priority != DEFAULT_INIT_PRIORITY)
16893 sprintf (buf, "%s.%.5u",
16894 is_ctor ? ".init_array" : ".fini_array",
16896 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16903 switch_to_section (s);
16904 assemble_align (POINTER_SIZE);
16905 fputs ("\t.word\t", asm_out_file);
16906 output_addr_const (asm_out_file, symbol);
16907 fputs ("(target1)\n", asm_out_file);
16910 /* Add a function to the list of static constructors. */
16913 arm_elf_asm_constructor (rtx symbol, int priority)
16915 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16918 /* Add a function to the list of static destructors. */
16921 arm_elf_asm_destructor (rtx symbol, int priority)
16923 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16926 /* A finite state machine takes care of noticing whether or not instructions
16927 can be conditionally executed, and thus decrease execution time and code
16928 size by deleting branch instructions. The fsm is controlled by
16929 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16931 /* The state of the fsm controlling condition codes are:
16932 0: normal, do nothing special
16933 1: make ASM_OUTPUT_OPCODE not output this instruction
16934 2: make ASM_OUTPUT_OPCODE not output this instruction
16935 3: make instructions conditional
16936 4: make instructions conditional
16938 State transitions (state->state by whom under condition):
16939 0 -> 1 final_prescan_insn if the `target' is a label
16940 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16941 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16942 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16943 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16944 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16945 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16946 (the target insn is arm_target_insn).
16948 If the jump clobbers the conditions then we use states 2 and 4.
16950 A similar thing can be done with conditional return insns.
16952 XXX In case the `target' is an unconditional branch, this conditionalising
16953 of the instructions always reduces code size, but not always execution
16954 time. But then, I want to reduce the code size to somewhere near what
16955 /bin/cc produces. */
16957 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16958 instructions. When a COND_EXEC instruction is seen the subsequent
16959 instructions are scanned so that multiple conditional instructions can be
16960 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16961 specify the length and true/false mask for the IT block. These will be
16962 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16964 /* Returns the index of the ARM condition code string in
16965 `arm_condition_codes'. COMPARISON should be an rtx like
16966 `(eq (...) (...))'. */
16967 static enum arm_cond_code
16968 get_arm_condition_code (rtx comparison)
16970 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16971 enum arm_cond_code code;
16972 enum rtx_code comp_code = GET_CODE (comparison);
16974 if (GET_MODE_CLASS (mode) != MODE_CC)
16975 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16976 XEXP (comparison, 1));
16980 case CC_DNEmode: code = ARM_NE; goto dominance;
16981 case CC_DEQmode: code = ARM_EQ; goto dominance;
16982 case CC_DGEmode: code = ARM_GE; goto dominance;
16983 case CC_DGTmode: code = ARM_GT; goto dominance;
16984 case CC_DLEmode: code = ARM_LE; goto dominance;
16985 case CC_DLTmode: code = ARM_LT; goto dominance;
16986 case CC_DGEUmode: code = ARM_CS; goto dominance;
16987 case CC_DGTUmode: code = ARM_HI; goto dominance;
16988 case CC_DLEUmode: code = ARM_LS; goto dominance;
16989 case CC_DLTUmode: code = ARM_CC;
16992 gcc_assert (comp_code == EQ || comp_code == NE);
16994 if (comp_code == EQ)
16995 return ARM_INVERSE_CONDITION_CODE (code);
17001 case NE: return ARM_NE;
17002 case EQ: return ARM_EQ;
17003 case GE: return ARM_PL;
17004 case LT: return ARM_MI;
17005 default: gcc_unreachable ();
17011 case NE: return ARM_NE;
17012 case EQ: return ARM_EQ;
17013 default: gcc_unreachable ();
17019 case NE: return ARM_MI;
17020 case EQ: return ARM_PL;
17021 default: gcc_unreachable ();
17026 /* These encodings assume that AC=1 in the FPA system control
17027 byte. This allows us to handle all cases except UNEQ and
17031 case GE: return ARM_GE;
17032 case GT: return ARM_GT;
17033 case LE: return ARM_LS;
17034 case LT: return ARM_MI;
17035 case NE: return ARM_NE;
17036 case EQ: return ARM_EQ;
17037 case ORDERED: return ARM_VC;
17038 case UNORDERED: return ARM_VS;
17039 case UNLT: return ARM_LT;
17040 case UNLE: return ARM_LE;
17041 case UNGT: return ARM_HI;
17042 case UNGE: return ARM_PL;
17043 /* UNEQ and LTGT do not have a representation. */
17044 case UNEQ: /* Fall through. */
17045 case LTGT: /* Fall through. */
17046 default: gcc_unreachable ();
17052 case NE: return ARM_NE;
17053 case EQ: return ARM_EQ;
17054 case GE: return ARM_LE;
17055 case GT: return ARM_LT;
17056 case LE: return ARM_GE;
17057 case LT: return ARM_GT;
17058 case GEU: return ARM_LS;
17059 case GTU: return ARM_CC;
17060 case LEU: return ARM_CS;
17061 case LTU: return ARM_HI;
17062 default: gcc_unreachable ();
17068 case LTU: return ARM_CS;
17069 case GEU: return ARM_CC;
17070 default: gcc_unreachable ();
17076 case NE: return ARM_NE;
17077 case EQ: return ARM_EQ;
17078 case GEU: return ARM_CS;
17079 case GTU: return ARM_HI;
17080 case LEU: return ARM_LS;
17081 case LTU: return ARM_CC;
17082 default: gcc_unreachable ();
17088 case GE: return ARM_GE;
17089 case LT: return ARM_LT;
17090 case GEU: return ARM_CS;
17091 case LTU: return ARM_CC;
17092 default: gcc_unreachable ();
17098 case NE: return ARM_NE;
17099 case EQ: return ARM_EQ;
17100 case GE: return ARM_GE;
17101 case GT: return ARM_GT;
17102 case LE: return ARM_LE;
17103 case LT: return ARM_LT;
17104 case GEU: return ARM_CS;
17105 case GTU: return ARM_HI;
17106 case LEU: return ARM_LS;
17107 case LTU: return ARM_CC;
17108 default: gcc_unreachable ();
17111 default: gcc_unreachable ();
17115 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17118 thumb2_final_prescan_insn (rtx insn)
17120 rtx first_insn = insn;
17121 rtx body = PATTERN (insn);
17123 enum arm_cond_code code;
17127 /* Remove the previous insn from the count of insns to be output. */
17128 if (arm_condexec_count)
17129 arm_condexec_count--;
17131 /* Nothing to do if we are already inside a conditional block. */
17132 if (arm_condexec_count)
17135 if (GET_CODE (body) != COND_EXEC)
17138 /* Conditional jumps are implemented directly. */
17139 if (GET_CODE (insn) == JUMP_INSN)
17142 predicate = COND_EXEC_TEST (body);
17143 arm_current_cc = get_arm_condition_code (predicate);
17145 n = get_attr_ce_count (insn);
17146 arm_condexec_count = 1;
17147 arm_condexec_mask = (1 << n) - 1;
17148 arm_condexec_masklen = n;
17149 /* See if subsequent instructions can be combined into the same block. */
17152 insn = next_nonnote_insn (insn);
17154 /* Jumping into the middle of an IT block is illegal, so a label or
17155 barrier terminates the block. */
17156 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17159 body = PATTERN (insn);
17160 /* USE and CLOBBER aren't really insns, so just skip them. */
17161 if (GET_CODE (body) == USE
17162 || GET_CODE (body) == CLOBBER)
17165 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17166 if (GET_CODE (body) != COND_EXEC)
17168 /* Allow up to 4 conditionally executed instructions in a block. */
17169 n = get_attr_ce_count (insn);
17170 if (arm_condexec_masklen + n > 4)
17173 predicate = COND_EXEC_TEST (body);
17174 code = get_arm_condition_code (predicate);
17175 mask = (1 << n) - 1;
17176 if (arm_current_cc == code)
17177 arm_condexec_mask |= (mask << arm_condexec_masklen);
17178 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17181 arm_condexec_count++;
17182 arm_condexec_masklen += n;
17184 /* A jump must be the last instruction in a conditional block. */
17185 if (GET_CODE(insn) == JUMP_INSN)
17188 /* Restore recog_data (getting the attributes of other insns can
17189 destroy this array, but final.c assumes that it remains intact
17190 across this call). */
17191 extract_constrain_insn_cached (first_insn);
17195 arm_final_prescan_insn (rtx insn)
17197 /* BODY will hold the body of INSN. */
17198 rtx body = PATTERN (insn);
17200 /* This will be 1 if trying to repeat the trick, and things need to be
17201 reversed if it appears to fail. */
17204 /* If we start with a return insn, we only succeed if we find another one. */
17205 int seeking_return = 0;
17207 /* START_INSN will hold the insn from where we start looking. This is the
17208 first insn after the following code_label if REVERSE is true. */
17209 rtx start_insn = insn;
17211 /* If in state 4, check if the target branch is reached, in order to
17212 change back to state 0. */
17213 if (arm_ccfsm_state == 4)
17215 if (insn == arm_target_insn)
17217 arm_target_insn = NULL;
17218 arm_ccfsm_state = 0;
17223 /* If in state 3, it is possible to repeat the trick, if this insn is an
17224 unconditional branch to a label, and immediately following this branch
17225 is the previous target label which is only used once, and the label this
17226 branch jumps to is not too far off. */
17227 if (arm_ccfsm_state == 3)
17229 if (simplejump_p (insn))
17231 start_insn = next_nonnote_insn (start_insn);
17232 if (GET_CODE (start_insn) == BARRIER)
17234 /* XXX Isn't this always a barrier? */
17235 start_insn = next_nonnote_insn (start_insn);
17237 if (GET_CODE (start_insn) == CODE_LABEL
17238 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17239 && LABEL_NUSES (start_insn) == 1)
17244 else if (GET_CODE (body) == RETURN)
17246 start_insn = next_nonnote_insn (start_insn);
17247 if (GET_CODE (start_insn) == BARRIER)
17248 start_insn = next_nonnote_insn (start_insn);
17249 if (GET_CODE (start_insn) == CODE_LABEL
17250 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17251 && LABEL_NUSES (start_insn) == 1)
17254 seeking_return = 1;
17263 gcc_assert (!arm_ccfsm_state || reverse);
17264 if (GET_CODE (insn) != JUMP_INSN)
17267 /* This jump might be paralleled with a clobber of the condition codes
17268 the jump should always come first */
17269 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17270 body = XVECEXP (body, 0, 0);
17273 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17274 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17277 int fail = FALSE, succeed = FALSE;
17278 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17279 int then_not_else = TRUE;
17280 rtx this_insn = start_insn, label = 0;
17282 /* Register the insn jumped to. */
17285 if (!seeking_return)
17286 label = XEXP (SET_SRC (body), 0);
17288 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17289 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17290 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17292 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17293 then_not_else = FALSE;
17295 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17296 seeking_return = 1;
17297 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17299 seeking_return = 1;
17300 then_not_else = FALSE;
17303 gcc_unreachable ();
17305 /* See how many insns this branch skips, and what kind of insns. If all
17306 insns are okay, and the label or unconditional branch to the same
17307 label is not too far away, succeed. */
17308 for (insns_skipped = 0;
17309 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17313 this_insn = next_nonnote_insn (this_insn);
17317 switch (GET_CODE (this_insn))
17320 /* Succeed if it is the target label, otherwise fail since
17321 control falls in from somewhere else. */
17322 if (this_insn == label)
17324 arm_ccfsm_state = 1;
17332 /* Succeed if the following insn is the target label.
17334 If return insns are used then the last insn in a function
17335 will be a barrier. */
17336 this_insn = next_nonnote_insn (this_insn);
17337 if (this_insn && this_insn == label)
17339 arm_ccfsm_state = 1;
17347 /* The AAPCS says that conditional calls should not be
17348 used since they make interworking inefficient (the
17349 linker can't transform BL<cond> into BLX). That's
17350 only a problem if the machine has BLX. */
17357 /* Succeed if the following insn is the target label, or
17358 if the following two insns are a barrier and the
17360 this_insn = next_nonnote_insn (this_insn);
17361 if (this_insn && GET_CODE (this_insn) == BARRIER)
17362 this_insn = next_nonnote_insn (this_insn);
17364 if (this_insn && this_insn == label
17365 && insns_skipped < max_insns_skipped)
17367 arm_ccfsm_state = 1;
17375 /* If this is an unconditional branch to the same label, succeed.
17376 If it is to another label, do nothing. If it is conditional,
17378 /* XXX Probably, the tests for SET and the PC are
17381 scanbody = PATTERN (this_insn);
17382 if (GET_CODE (scanbody) == SET
17383 && GET_CODE (SET_DEST (scanbody)) == PC)
17385 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17386 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17388 arm_ccfsm_state = 2;
17391 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17394 /* Fail if a conditional return is undesirable (e.g. on a
17395 StrongARM), but still allow this if optimizing for size. */
17396 else if (GET_CODE (scanbody) == RETURN
17397 && !use_return_insn (TRUE, NULL)
17400 else if (GET_CODE (scanbody) == RETURN
17403 arm_ccfsm_state = 2;
17406 else if (GET_CODE (scanbody) == PARALLEL)
17408 switch (get_attr_conds (this_insn))
17418 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17423 /* Instructions using or affecting the condition codes make it
17425 scanbody = PATTERN (this_insn);
17426 if (!(GET_CODE (scanbody) == SET
17427 || GET_CODE (scanbody) == PARALLEL)
17428 || get_attr_conds (this_insn) != CONDS_NOCOND)
17431 /* A conditional cirrus instruction must be followed by
17432 a non Cirrus instruction. However, since we
17433 conditionalize instructions in this function and by
17434 the time we get here we can't add instructions
17435 (nops), because shorten_branches() has already been
17436 called, we will disable conditionalizing Cirrus
17437 instructions to be safe. */
17438 if (GET_CODE (scanbody) != USE
17439 && GET_CODE (scanbody) != CLOBBER
17440 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17450 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17451 arm_target_label = CODE_LABEL_NUMBER (label);
17454 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17456 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17458 this_insn = next_nonnote_insn (this_insn);
17459 gcc_assert (!this_insn
17460 || (GET_CODE (this_insn) != BARRIER
17461 && GET_CODE (this_insn) != CODE_LABEL));
17465 /* Oh, dear! we ran off the end.. give up. */
17466 extract_constrain_insn_cached (insn);
17467 arm_ccfsm_state = 0;
17468 arm_target_insn = NULL;
17471 arm_target_insn = this_insn;
17474 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17477 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17479 if (reverse || then_not_else)
17480 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17483 /* Restore recog_data (getting the attributes of other insns can
17484 destroy this array, but final.c assumes that it remains intact
17485 across this call. */
17486 extract_constrain_insn_cached (insn);
17490 /* Output IT instructions. */
17492 thumb2_asm_output_opcode (FILE * stream)
17497 if (arm_condexec_mask)
17499 for (n = 0; n < arm_condexec_masklen; n++)
17500 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17502 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17503 arm_condition_codes[arm_current_cc]);
17504 arm_condexec_mask = 0;
17508 /* Returns true if REGNO is a valid register
17509 for holding a quantity of type MODE. */
17511 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17513 if (GET_MODE_CLASS (mode) == MODE_CC)
17514 return (regno == CC_REGNUM
17515 || (TARGET_HARD_FLOAT && TARGET_VFP
17516 && regno == VFPCC_REGNUM));
17519 /* For the Thumb we only allow values bigger than SImode in
17520 registers 0 - 6, so that there is always a second low
17521 register available to hold the upper part of the value.
17522 We probably we ought to ensure that the register is the
17523 start of an even numbered register pair. */
17524 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17526 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17527 && IS_CIRRUS_REGNUM (regno))
17528 /* We have outlawed SI values in Cirrus registers because they
17529 reside in the lower 32 bits, but SF values reside in the
17530 upper 32 bits. This causes gcc all sorts of grief. We can't
17531 even split the registers into pairs because Cirrus SI values
17532 get sign extended to 64bits-- aldyh. */
17533 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17535 if (TARGET_HARD_FLOAT && TARGET_VFP
17536 && IS_VFP_REGNUM (regno))
17538 if (mode == SFmode || mode == SImode)
17539 return VFP_REGNO_OK_FOR_SINGLE (regno);
17541 if (mode == DFmode)
17542 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17544 /* VFP registers can hold HFmode values, but there is no point in
17545 putting them there unless we have hardware conversion insns. */
17546 if (mode == HFmode)
17547 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17550 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17551 || (VALID_NEON_QREG_MODE (mode)
17552 && NEON_REGNO_OK_FOR_QUAD (regno))
17553 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17554 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17555 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17556 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17557 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17562 if (TARGET_REALLY_IWMMXT)
17564 if (IS_IWMMXT_GR_REGNUM (regno))
17565 return mode == SImode;
17567 if (IS_IWMMXT_REGNUM (regno))
17568 return VALID_IWMMXT_REG_MODE (mode);
17571 /* We allow almost any value to be stored in the general registers.
17572 Restrict doubleword quantities to even register pairs so that we can
17573 use ldrd. Do not allow very large Neon structure opaque modes in
17574 general registers; they would use too many. */
17575 if (regno <= LAST_ARM_REGNUM)
17576 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17577 && ARM_NUM_REGS (mode) <= 4;
17579 if (regno == FRAME_POINTER_REGNUM
17580 || regno == ARG_POINTER_REGNUM)
17581 /* We only allow integers in the fake hard registers. */
17582 return GET_MODE_CLASS (mode) == MODE_INT;
17584 /* The only registers left are the FPA registers
17585 which we only allow to hold FP values. */
17586 return (TARGET_HARD_FLOAT && TARGET_FPA
17587 && GET_MODE_CLASS (mode) == MODE_FLOAT
17588 && regno >= FIRST_FPA_REGNUM
17589 && regno <= LAST_FPA_REGNUM);
17592 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17593 not used in arm mode. */
17596 arm_regno_class (int regno)
17600 if (regno == STACK_POINTER_REGNUM)
17602 if (regno == CC_REGNUM)
17609 if (TARGET_THUMB2 && regno < 8)
17612 if ( regno <= LAST_ARM_REGNUM
17613 || regno == FRAME_POINTER_REGNUM
17614 || regno == ARG_POINTER_REGNUM)
17615 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17617 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17618 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17620 if (IS_CIRRUS_REGNUM (regno))
17621 return CIRRUS_REGS;
17623 if (IS_VFP_REGNUM (regno))
17625 if (regno <= D7_VFP_REGNUM)
17626 return VFP_D0_D7_REGS;
17627 else if (regno <= LAST_LO_VFP_REGNUM)
17628 return VFP_LO_REGS;
17630 return VFP_HI_REGS;
17633 if (IS_IWMMXT_REGNUM (regno))
17634 return IWMMXT_REGS;
17636 if (IS_IWMMXT_GR_REGNUM (regno))
17637 return IWMMXT_GR_REGS;
17642 /* Handle a special case when computing the offset
17643 of an argument from the frame pointer. */
17645 arm_debugger_arg_offset (int value, rtx addr)
17649 /* We are only interested if dbxout_parms() failed to compute the offset. */
17653 /* We can only cope with the case where the address is held in a register. */
17654 if (GET_CODE (addr) != REG)
17657 /* If we are using the frame pointer to point at the argument, then
17658 an offset of 0 is correct. */
17659 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17662 /* If we are using the stack pointer to point at the
17663 argument, then an offset of 0 is correct. */
17664 /* ??? Check this is consistent with thumb2 frame layout. */
17665 if ((TARGET_THUMB || !frame_pointer_needed)
17666 && REGNO (addr) == SP_REGNUM)
17669 /* Oh dear. The argument is pointed to by a register rather
17670 than being held in a register, or being stored at a known
17671 offset from the frame pointer. Since GDB only understands
17672 those two kinds of argument we must translate the address
17673 held in the register into an offset from the frame pointer.
17674 We do this by searching through the insns for the function
17675 looking to see where this register gets its value. If the
17676 register is initialized from the frame pointer plus an offset
17677 then we are in luck and we can continue, otherwise we give up.
17679 This code is exercised by producing debugging information
17680 for a function with arguments like this:
17682 double func (double a, double b, int c, double d) {return d;}
17684 Without this code the stab for parameter 'd' will be set to
17685 an offset of 0 from the frame pointer, rather than 8. */
17687 /* The if() statement says:
17689 If the insn is a normal instruction
17690 and if the insn is setting the value in a register
17691 and if the register being set is the register holding the address of the argument
17692 and if the address is computing by an addition
17693 that involves adding to a register
17694 which is the frame pointer
17699 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17701 if ( GET_CODE (insn) == INSN
17702 && GET_CODE (PATTERN (insn)) == SET
17703 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17704 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17705 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17706 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17707 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17710 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17719 warning (0, "unable to compute real location of stacked parameter");
17720 value = 8; /* XXX magic hack */
17726 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17729 if ((MASK) & insn_flags) \
17730 add_builtin_function ((NAME), (TYPE), (CODE), \
17731 BUILT_IN_MD, NULL, NULL_TREE); \
17735 struct builtin_description
17737 const unsigned int mask;
17738 const enum insn_code icode;
17739 const char * const name;
17740 const enum arm_builtins code;
17741 const enum rtx_code comparison;
17742 const unsigned int flag;
17745 static const struct builtin_description bdesc_2arg[] =
17747 #define IWMMXT_BUILTIN(code, string, builtin) \
17748 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17749 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17751 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17752 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17753 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17754 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17755 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17756 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17757 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17758 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17759 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17760 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17761 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17762 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17763 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17764 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17765 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17766 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17767 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17768 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17769 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17770 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17771 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17772 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17773 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17774 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17775 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17776 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17777 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17778 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17779 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17780 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17781 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17782 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17783 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17784 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17785 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17786 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17787 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17788 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17789 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17790 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17791 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17792 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17793 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17794 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17795 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17796 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17797 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17798 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17799 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17800 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17801 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17802 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17803 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17804 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17805 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17806 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17807 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17808 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17810 #define IWMMXT_BUILTIN2(code, builtin) \
17811 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17813 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17814 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17815 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17816 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17817 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17818 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17819 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17820 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17821 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17822 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17823 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17824 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17825 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17826 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17827 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17828 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17829 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17830 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17831 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17832 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17833 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17834 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17835 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17836 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17837 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17838 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17839 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17840 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17841 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17842 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17843 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17844 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17847 static const struct builtin_description bdesc_1arg[] =
17849 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17850 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17851 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17852 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17853 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17854 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17855 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17856 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17857 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17858 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17859 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17860 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17861 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17862 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17863 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17864 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17865 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17866 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17869 /* Set up all the iWMMXt builtins. This is
17870 not called if TARGET_IWMMXT is zero. */
17873 arm_init_iwmmxt_builtins (void)
17875 const struct builtin_description * d;
17877 tree endlink = void_list_node;
17879 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17880 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17881 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17884 = build_function_type (integer_type_node,
17885 tree_cons (NULL_TREE, integer_type_node, endlink));
17886 tree v8qi_ftype_v8qi_v8qi_int
17887 = build_function_type (V8QI_type_node,
17888 tree_cons (NULL_TREE, V8QI_type_node,
17889 tree_cons (NULL_TREE, V8QI_type_node,
17890 tree_cons (NULL_TREE,
17893 tree v4hi_ftype_v4hi_int
17894 = build_function_type (V4HI_type_node,
17895 tree_cons (NULL_TREE, V4HI_type_node,
17896 tree_cons (NULL_TREE, integer_type_node,
17898 tree v2si_ftype_v2si_int
17899 = build_function_type (V2SI_type_node,
17900 tree_cons (NULL_TREE, V2SI_type_node,
17901 tree_cons (NULL_TREE, integer_type_node,
17903 tree v2si_ftype_di_di
17904 = build_function_type (V2SI_type_node,
17905 tree_cons (NULL_TREE, long_long_integer_type_node,
17906 tree_cons (NULL_TREE, long_long_integer_type_node,
17908 tree di_ftype_di_int
17909 = build_function_type (long_long_integer_type_node,
17910 tree_cons (NULL_TREE, long_long_integer_type_node,
17911 tree_cons (NULL_TREE, integer_type_node,
17913 tree di_ftype_di_int_int
17914 = build_function_type (long_long_integer_type_node,
17915 tree_cons (NULL_TREE, long_long_integer_type_node,
17916 tree_cons (NULL_TREE, integer_type_node,
17917 tree_cons (NULL_TREE,
17920 tree int_ftype_v8qi
17921 = build_function_type (integer_type_node,
17922 tree_cons (NULL_TREE, V8QI_type_node,
17924 tree int_ftype_v4hi
17925 = build_function_type (integer_type_node,
17926 tree_cons (NULL_TREE, V4HI_type_node,
17928 tree int_ftype_v2si
17929 = build_function_type (integer_type_node,
17930 tree_cons (NULL_TREE, V2SI_type_node,
17932 tree int_ftype_v8qi_int
17933 = build_function_type (integer_type_node,
17934 tree_cons (NULL_TREE, V8QI_type_node,
17935 tree_cons (NULL_TREE, integer_type_node,
17937 tree int_ftype_v4hi_int
17938 = build_function_type (integer_type_node,
17939 tree_cons (NULL_TREE, V4HI_type_node,
17940 tree_cons (NULL_TREE, integer_type_node,
17942 tree int_ftype_v2si_int
17943 = build_function_type (integer_type_node,
17944 tree_cons (NULL_TREE, V2SI_type_node,
17945 tree_cons (NULL_TREE, integer_type_node,
17947 tree v8qi_ftype_v8qi_int_int
17948 = build_function_type (V8QI_type_node,
17949 tree_cons (NULL_TREE, V8QI_type_node,
17950 tree_cons (NULL_TREE, integer_type_node,
17951 tree_cons (NULL_TREE,
17954 tree v4hi_ftype_v4hi_int_int
17955 = build_function_type (V4HI_type_node,
17956 tree_cons (NULL_TREE, V4HI_type_node,
17957 tree_cons (NULL_TREE, integer_type_node,
17958 tree_cons (NULL_TREE,
17961 tree v2si_ftype_v2si_int_int
17962 = build_function_type (V2SI_type_node,
17963 tree_cons (NULL_TREE, V2SI_type_node,
17964 tree_cons (NULL_TREE, integer_type_node,
17965 tree_cons (NULL_TREE,
17968 /* Miscellaneous. */
17969 tree v8qi_ftype_v4hi_v4hi
17970 = build_function_type (V8QI_type_node,
17971 tree_cons (NULL_TREE, V4HI_type_node,
17972 tree_cons (NULL_TREE, V4HI_type_node,
17974 tree v4hi_ftype_v2si_v2si
17975 = build_function_type (V4HI_type_node,
17976 tree_cons (NULL_TREE, V2SI_type_node,
17977 tree_cons (NULL_TREE, V2SI_type_node,
17979 tree v2si_ftype_v4hi_v4hi
17980 = build_function_type (V2SI_type_node,
17981 tree_cons (NULL_TREE, V4HI_type_node,
17982 tree_cons (NULL_TREE, V4HI_type_node,
17984 tree v2si_ftype_v8qi_v8qi
17985 = build_function_type (V2SI_type_node,
17986 tree_cons (NULL_TREE, V8QI_type_node,
17987 tree_cons (NULL_TREE, V8QI_type_node,
17989 tree v4hi_ftype_v4hi_di
17990 = build_function_type (V4HI_type_node,
17991 tree_cons (NULL_TREE, V4HI_type_node,
17992 tree_cons (NULL_TREE,
17993 long_long_integer_type_node,
17995 tree v2si_ftype_v2si_di
17996 = build_function_type (V2SI_type_node,
17997 tree_cons (NULL_TREE, V2SI_type_node,
17998 tree_cons (NULL_TREE,
17999 long_long_integer_type_node,
18001 tree void_ftype_int_int
18002 = build_function_type (void_type_node,
18003 tree_cons (NULL_TREE, integer_type_node,
18004 tree_cons (NULL_TREE, integer_type_node,
18007 = build_function_type (long_long_unsigned_type_node, endlink);
18009 = build_function_type (long_long_integer_type_node,
18010 tree_cons (NULL_TREE, V8QI_type_node,
18013 = build_function_type (long_long_integer_type_node,
18014 tree_cons (NULL_TREE, V4HI_type_node,
18017 = build_function_type (long_long_integer_type_node,
18018 tree_cons (NULL_TREE, V2SI_type_node,
18020 tree v2si_ftype_v4hi
18021 = build_function_type (V2SI_type_node,
18022 tree_cons (NULL_TREE, V4HI_type_node,
18024 tree v4hi_ftype_v8qi
18025 = build_function_type (V4HI_type_node,
18026 tree_cons (NULL_TREE, V8QI_type_node,
18029 tree di_ftype_di_v4hi_v4hi
18030 = build_function_type (long_long_unsigned_type_node,
18031 tree_cons (NULL_TREE,
18032 long_long_unsigned_type_node,
18033 tree_cons (NULL_TREE, V4HI_type_node,
18034 tree_cons (NULL_TREE,
18038 tree di_ftype_v4hi_v4hi
18039 = build_function_type (long_long_unsigned_type_node,
18040 tree_cons (NULL_TREE, V4HI_type_node,
18041 tree_cons (NULL_TREE, V4HI_type_node,
18044 /* Normal vector binops. */
18045 tree v8qi_ftype_v8qi_v8qi
18046 = build_function_type (V8QI_type_node,
18047 tree_cons (NULL_TREE, V8QI_type_node,
18048 tree_cons (NULL_TREE, V8QI_type_node,
18050 tree v4hi_ftype_v4hi_v4hi
18051 = build_function_type (V4HI_type_node,
18052 tree_cons (NULL_TREE, V4HI_type_node,
18053 tree_cons (NULL_TREE, V4HI_type_node,
18055 tree v2si_ftype_v2si_v2si
18056 = build_function_type (V2SI_type_node,
18057 tree_cons (NULL_TREE, V2SI_type_node,
18058 tree_cons (NULL_TREE, V2SI_type_node,
18060 tree di_ftype_di_di
18061 = build_function_type (long_long_unsigned_type_node,
18062 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18063 tree_cons (NULL_TREE,
18064 long_long_unsigned_type_node,
18067 /* Add all builtins that are more or less simple operations on two
18069 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18071 /* Use one of the operands; the target can have a different mode for
18072 mask-generating compares. */
18073 enum machine_mode mode;
18079 mode = insn_data[d->icode].operand[1].mode;
18084 type = v8qi_ftype_v8qi_v8qi;
18087 type = v4hi_ftype_v4hi_v4hi;
18090 type = v2si_ftype_v2si_v2si;
18093 type = di_ftype_di_di;
18097 gcc_unreachable ();
18100 def_mbuiltin (d->mask, d->name, type, d->code);
18103 /* Add the remaining MMX insns with somewhat more complicated types. */
18104 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18105 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18106 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18108 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18109 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18110 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18111 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18112 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18113 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18115 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18116 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18117 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18118 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18120 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18122 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18123 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18124 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18125 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18126 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18127 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18129 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18130 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18131 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18132 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18133 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18134 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18136 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18138 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18139 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18140 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18141 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18143 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18144 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18145 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18146 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18147 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18148 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18149 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18150 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18151 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18153 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18154 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18155 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18157 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18158 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18159 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18161 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18162 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18163 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18164 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18165 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18166 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18168 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18169 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18170 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18171 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18172 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18173 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18174 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18175 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18176 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18177 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18178 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18179 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18181 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18182 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18183 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18184 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18186 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18187 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18188 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18189 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18190 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18191 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18192 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18196 arm_init_tls_builtins (void)
18200 ftype = build_function_type (ptr_type_node, void_list_node);
18201 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18202 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18204 TREE_NOTHROW (decl) = 1;
18205 TREE_READONLY (decl) = 1;
18208 enum neon_builtin_type_bits {
18224 #define v8qi_UP T_V8QI
18225 #define v4hi_UP T_V4HI
18226 #define v2si_UP T_V2SI
18227 #define v2sf_UP T_V2SF
18229 #define v16qi_UP T_V16QI
18230 #define v8hi_UP T_V8HI
18231 #define v4si_UP T_V4SI
18232 #define v4sf_UP T_V4SF
18233 #define v2di_UP T_V2DI
18238 #define UP(X) X##_UP
18273 NEON_LOADSTRUCTLANE,
18275 NEON_STORESTRUCTLANE,
18284 const neon_itype itype;
18286 const enum insn_code codes[T_MAX];
18287 const unsigned int num_vars;
18288 unsigned int base_fcode;
18289 } neon_builtin_datum;
18291 #define CF(N,X) CODE_FOR_neon_##N##X
18293 #define VAR1(T, N, A) \
18294 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18295 #define VAR2(T, N, A, B) \
18296 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18297 #define VAR3(T, N, A, B, C) \
18298 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18299 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18300 #define VAR4(T, N, A, B, C, D) \
18301 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18302 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18303 #define VAR5(T, N, A, B, C, D, E) \
18304 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18305 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18306 #define VAR6(T, N, A, B, C, D, E, F) \
18307 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18308 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18309 #define VAR7(T, N, A, B, C, D, E, F, G) \
18310 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18311 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18313 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18314 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18316 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18317 CF (N, G), CF (N, H) }, 8, 0
18318 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18319 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18320 | UP (H) | UP (I), \
18321 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18322 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18323 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18324 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18325 | UP (H) | UP (I) | UP (J), \
18326 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18327 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18329 /* The mode entries in the following table correspond to the "key" type of the
18330 instruction variant, i.e. equivalent to that which would be specified after
18331 the assembler mnemonic, which usually refers to the last vector operand.
18332 (Signed/unsigned/polynomial types are not differentiated between though, and
18333 are all mapped onto the same mode for a given element size.) The modes
18334 listed per instruction should be the same as those defined for that
18335 instruction's pattern in neon.md.
18336 WARNING: Variants should be listed in the same increasing order as
18337 neon_builtin_type_bits. */
18339 static neon_builtin_datum neon_builtin_data[] =
18341 { VAR10 (BINOP, vadd,
18342 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18343 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18344 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18345 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18346 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18347 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18348 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18349 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18350 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18351 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18352 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18353 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18354 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18355 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18356 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18357 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18358 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18359 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18360 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18361 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18362 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18363 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18364 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18365 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18366 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18367 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18368 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18369 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18370 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18371 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18372 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18373 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18374 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18375 { VAR10 (BINOP, vsub,
18376 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18377 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18378 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18379 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18380 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18381 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18382 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18383 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18384 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18385 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18386 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18387 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18388 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18389 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18390 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18391 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18392 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18393 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18394 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18395 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18396 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18397 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18398 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18399 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18400 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18401 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18402 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18403 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18404 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18405 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18406 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18407 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18408 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18409 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18410 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18411 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18412 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18413 /* FIXME: vget_lane supports more variants than this! */
18414 { VAR10 (GETLANE, vget_lane,
18415 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18416 { VAR10 (SETLANE, vset_lane,
18417 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18418 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18419 { VAR10 (DUP, vdup_n,
18420 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18421 { VAR10 (DUPLANE, vdup_lane,
18422 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18423 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18424 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18425 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18426 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18427 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18428 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18429 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18430 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18431 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18432 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18433 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18434 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18435 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18436 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18437 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18438 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18439 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18440 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18441 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18442 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18443 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18444 { VAR10 (BINOP, vext,
18445 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18446 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18447 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18448 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18449 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18450 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18451 { VAR10 (SELECT, vbsl,
18452 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18453 { VAR1 (VTBL, vtbl1, v8qi) },
18454 { VAR1 (VTBL, vtbl2, v8qi) },
18455 { VAR1 (VTBL, vtbl3, v8qi) },
18456 { VAR1 (VTBL, vtbl4, v8qi) },
18457 { VAR1 (VTBX, vtbx1, v8qi) },
18458 { VAR1 (VTBX, vtbx2, v8qi) },
18459 { VAR1 (VTBX, vtbx3, v8qi) },
18460 { VAR1 (VTBX, vtbx4, v8qi) },
18461 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18462 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18463 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18464 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18465 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18466 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18467 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18468 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18469 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18470 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18471 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18472 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18473 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18474 { VAR10 (LOAD1, vld1,
18475 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18476 { VAR10 (LOAD1LANE, vld1_lane,
18477 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18478 { VAR10 (LOAD1, vld1_dup,
18479 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18480 { VAR10 (STORE1, vst1,
18481 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18482 { VAR10 (STORE1LANE, vst1_lane,
18483 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18484 { VAR9 (LOADSTRUCT,
18485 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18486 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18487 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18488 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18489 { VAR9 (STORESTRUCT, vst2,
18490 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18491 { VAR7 (STORESTRUCTLANE, vst2_lane,
18492 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18493 { VAR9 (LOADSTRUCT,
18494 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18495 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18496 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18497 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18498 { VAR9 (STORESTRUCT, vst3,
18499 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18500 { VAR7 (STORESTRUCTLANE, vst3_lane,
18501 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18502 { VAR9 (LOADSTRUCT, vld4,
18503 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18504 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18505 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18506 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18507 { VAR9 (STORESTRUCT, vst4,
18508 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18509 { VAR7 (STORESTRUCTLANE, vst4_lane,
18510 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18511 { VAR10 (LOGICBINOP, vand,
18512 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18513 { VAR10 (LOGICBINOP, vorr,
18514 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18515 { VAR10 (BINOP, veor,
18516 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18517 { VAR10 (LOGICBINOP, vbic,
18518 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18519 { VAR10 (LOGICBINOP, vorn,
18520 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18536 arm_init_neon_builtins (void)
18538 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18540 tree neon_intQI_type_node;
18541 tree neon_intHI_type_node;
18542 tree neon_polyQI_type_node;
18543 tree neon_polyHI_type_node;
18544 tree neon_intSI_type_node;
18545 tree neon_intDI_type_node;
18546 tree neon_float_type_node;
18548 tree intQI_pointer_node;
18549 tree intHI_pointer_node;
18550 tree intSI_pointer_node;
18551 tree intDI_pointer_node;
18552 tree float_pointer_node;
18554 tree const_intQI_node;
18555 tree const_intHI_node;
18556 tree const_intSI_node;
18557 tree const_intDI_node;
18558 tree const_float_node;
18560 tree const_intQI_pointer_node;
18561 tree const_intHI_pointer_node;
18562 tree const_intSI_pointer_node;
18563 tree const_intDI_pointer_node;
18564 tree const_float_pointer_node;
18566 tree V8QI_type_node;
18567 tree V4HI_type_node;
18568 tree V2SI_type_node;
18569 tree V2SF_type_node;
18570 tree V16QI_type_node;
18571 tree V8HI_type_node;
18572 tree V4SI_type_node;
18573 tree V4SF_type_node;
18574 tree V2DI_type_node;
18576 tree intUQI_type_node;
18577 tree intUHI_type_node;
18578 tree intUSI_type_node;
18579 tree intUDI_type_node;
18581 tree intEI_type_node;
18582 tree intOI_type_node;
18583 tree intCI_type_node;
18584 tree intXI_type_node;
18586 tree V8QI_pointer_node;
18587 tree V4HI_pointer_node;
18588 tree V2SI_pointer_node;
18589 tree V2SF_pointer_node;
18590 tree V16QI_pointer_node;
18591 tree V8HI_pointer_node;
18592 tree V4SI_pointer_node;
18593 tree V4SF_pointer_node;
18594 tree V2DI_pointer_node;
18596 tree void_ftype_pv8qi_v8qi_v8qi;
18597 tree void_ftype_pv4hi_v4hi_v4hi;
18598 tree void_ftype_pv2si_v2si_v2si;
18599 tree void_ftype_pv2sf_v2sf_v2sf;
18600 tree void_ftype_pdi_di_di;
18601 tree void_ftype_pv16qi_v16qi_v16qi;
18602 tree void_ftype_pv8hi_v8hi_v8hi;
18603 tree void_ftype_pv4si_v4si_v4si;
18604 tree void_ftype_pv4sf_v4sf_v4sf;
18605 tree void_ftype_pv2di_v2di_v2di;
18607 tree reinterp_ftype_dreg[5][5];
18608 tree reinterp_ftype_qreg[5][5];
18609 tree dreg_types[5], qreg_types[5];
18611 /* Create distinguished type nodes for NEON vector element types,
18612 and pointers to values of such types, so we can detect them later. */
18613 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18614 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18615 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18616 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18617 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18618 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18619 neon_float_type_node = make_node (REAL_TYPE);
18620 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18621 layout_type (neon_float_type_node);
18623 /* Define typedefs which exactly correspond to the modes we are basing vector
18624 types on. If you change these names you'll need to change
18625 the table used by arm_mangle_type too. */
18626 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18627 "__builtin_neon_qi");
18628 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18629 "__builtin_neon_hi");
18630 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18631 "__builtin_neon_si");
18632 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18633 "__builtin_neon_sf");
18634 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18635 "__builtin_neon_di");
18636 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18637 "__builtin_neon_poly8");
18638 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18639 "__builtin_neon_poly16");
18641 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18642 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18643 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18644 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18645 float_pointer_node = build_pointer_type (neon_float_type_node);
18647 /* Next create constant-qualified versions of the above types. */
18648 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18650 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18652 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18654 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18656 const_float_node = build_qualified_type (neon_float_type_node,
18659 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18660 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18661 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18662 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18663 const_float_pointer_node = build_pointer_type (const_float_node);
18665 /* Now create vector types based on our NEON element types. */
18666 /* 64-bit vectors. */
18668 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18670 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18672 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18674 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18675 /* 128-bit vectors. */
18677 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18679 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18681 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18683 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18685 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18687 /* Unsigned integer types for various mode sizes. */
18688 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18689 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18690 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18691 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18693 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18694 "__builtin_neon_uqi");
18695 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18696 "__builtin_neon_uhi");
18697 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18698 "__builtin_neon_usi");
18699 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18700 "__builtin_neon_udi");
18702 /* Opaque integer types for structures of vectors. */
18703 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18704 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18705 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18706 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18708 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18709 "__builtin_neon_ti");
18710 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18711 "__builtin_neon_ei");
18712 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18713 "__builtin_neon_oi");
18714 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18715 "__builtin_neon_ci");
18716 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18717 "__builtin_neon_xi");
18719 /* Pointers to vector types. */
18720 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18721 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18722 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18723 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18724 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18725 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18726 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18727 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18728 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18730 /* Operations which return results as pairs. */
18731 void_ftype_pv8qi_v8qi_v8qi =
18732 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18733 V8QI_type_node, NULL);
18734 void_ftype_pv4hi_v4hi_v4hi =
18735 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18736 V4HI_type_node, NULL);
18737 void_ftype_pv2si_v2si_v2si =
18738 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18739 V2SI_type_node, NULL);
18740 void_ftype_pv2sf_v2sf_v2sf =
18741 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18742 V2SF_type_node, NULL);
18743 void_ftype_pdi_di_di =
18744 build_function_type_list (void_type_node, intDI_pointer_node,
18745 neon_intDI_type_node, neon_intDI_type_node, NULL);
18746 void_ftype_pv16qi_v16qi_v16qi =
18747 build_function_type_list (void_type_node, V16QI_pointer_node,
18748 V16QI_type_node, V16QI_type_node, NULL);
18749 void_ftype_pv8hi_v8hi_v8hi =
18750 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18751 V8HI_type_node, NULL);
18752 void_ftype_pv4si_v4si_v4si =
18753 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18754 V4SI_type_node, NULL);
18755 void_ftype_pv4sf_v4sf_v4sf =
18756 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18757 V4SF_type_node, NULL);
18758 void_ftype_pv2di_v2di_v2di =
18759 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18760 V2DI_type_node, NULL);
18762 dreg_types[0] = V8QI_type_node;
18763 dreg_types[1] = V4HI_type_node;
18764 dreg_types[2] = V2SI_type_node;
18765 dreg_types[3] = V2SF_type_node;
18766 dreg_types[4] = neon_intDI_type_node;
18768 qreg_types[0] = V16QI_type_node;
18769 qreg_types[1] = V8HI_type_node;
18770 qreg_types[2] = V4SI_type_node;
18771 qreg_types[3] = V4SF_type_node;
18772 qreg_types[4] = V2DI_type_node;
18774 for (i = 0; i < 5; i++)
18777 for (j = 0; j < 5; j++)
18779 reinterp_ftype_dreg[i][j]
18780 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18781 reinterp_ftype_qreg[i][j]
18782 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18786 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18788 neon_builtin_datum *d = &neon_builtin_data[i];
18789 unsigned int j, codeidx = 0;
18791 d->base_fcode = fcode;
18793 for (j = 0; j < T_MAX; j++)
18795 const char* const modenames[] = {
18796 "v8qi", "v4hi", "v2si", "v2sf", "di",
18797 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18801 enum insn_code icode;
18802 int is_load = 0, is_store = 0;
18804 if ((d->bits & (1 << j)) == 0)
18807 icode = d->codes[codeidx++];
18812 case NEON_LOAD1LANE:
18813 case NEON_LOADSTRUCT:
18814 case NEON_LOADSTRUCTLANE:
18816 /* Fall through. */
18818 case NEON_STORE1LANE:
18819 case NEON_STORESTRUCT:
18820 case NEON_STORESTRUCTLANE:
18823 /* Fall through. */
18826 case NEON_LOGICBINOP:
18827 case NEON_SHIFTINSERT:
18834 case NEON_SHIFTIMM:
18835 case NEON_SHIFTACC:
18841 case NEON_LANEMULL:
18842 case NEON_LANEMULH:
18844 case NEON_SCALARMUL:
18845 case NEON_SCALARMULL:
18846 case NEON_SCALARMULH:
18847 case NEON_SCALARMAC:
18853 tree return_type = void_type_node, args = void_list_node;
18855 /* Build a function type directly from the insn_data for this
18856 builtin. The build_function_type() function takes care of
18857 removing duplicates for us. */
18858 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18862 if (is_load && k == 1)
18864 /* Neon load patterns always have the memory operand
18865 (a SImode pointer) in the operand 1 position. We
18866 want a const pointer to the element type in that
18868 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18874 eltype = const_intQI_pointer_node;
18879 eltype = const_intHI_pointer_node;
18884 eltype = const_intSI_pointer_node;
18889 eltype = const_float_pointer_node;
18894 eltype = const_intDI_pointer_node;
18897 default: gcc_unreachable ();
18900 else if (is_store && k == 0)
18902 /* Similarly, Neon store patterns use operand 0 as
18903 the memory location to store to (a SImode pointer).
18904 Use a pointer to the element type of the store in
18906 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18912 eltype = intQI_pointer_node;
18917 eltype = intHI_pointer_node;
18922 eltype = intSI_pointer_node;
18927 eltype = float_pointer_node;
18932 eltype = intDI_pointer_node;
18935 default: gcc_unreachable ();
18940 switch (insn_data[icode].operand[k].mode)
18942 case VOIDmode: eltype = void_type_node; break;
18944 case QImode: eltype = neon_intQI_type_node; break;
18945 case HImode: eltype = neon_intHI_type_node; break;
18946 case SImode: eltype = neon_intSI_type_node; break;
18947 case SFmode: eltype = neon_float_type_node; break;
18948 case DImode: eltype = neon_intDI_type_node; break;
18949 case TImode: eltype = intTI_type_node; break;
18950 case EImode: eltype = intEI_type_node; break;
18951 case OImode: eltype = intOI_type_node; break;
18952 case CImode: eltype = intCI_type_node; break;
18953 case XImode: eltype = intXI_type_node; break;
18954 /* 64-bit vectors. */
18955 case V8QImode: eltype = V8QI_type_node; break;
18956 case V4HImode: eltype = V4HI_type_node; break;
18957 case V2SImode: eltype = V2SI_type_node; break;
18958 case V2SFmode: eltype = V2SF_type_node; break;
18959 /* 128-bit vectors. */
18960 case V16QImode: eltype = V16QI_type_node; break;
18961 case V8HImode: eltype = V8HI_type_node; break;
18962 case V4SImode: eltype = V4SI_type_node; break;
18963 case V4SFmode: eltype = V4SF_type_node; break;
18964 case V2DImode: eltype = V2DI_type_node; break;
18965 default: gcc_unreachable ();
18969 if (k == 0 && !is_store)
18970 return_type = eltype;
18972 args = tree_cons (NULL_TREE, eltype, args);
18975 ftype = build_function_type (return_type, args);
18979 case NEON_RESULTPAIR:
18981 switch (insn_data[icode].operand[1].mode)
18983 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18984 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18985 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18986 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18987 case DImode: ftype = void_ftype_pdi_di_di; break;
18988 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18989 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18990 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18991 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18992 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18993 default: gcc_unreachable ();
18998 case NEON_REINTERP:
19000 /* We iterate over 5 doubleword types, then 5 quadword
19003 switch (insn_data[icode].operand[0].mode)
19005 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19006 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19007 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19008 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19009 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19010 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19011 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19012 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19013 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19014 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19015 default: gcc_unreachable ();
19021 gcc_unreachable ();
19024 gcc_assert (ftype != NULL);
19026 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19028 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19035 arm_init_fp16_builtins (void)
19037 tree fp16_type = make_node (REAL_TYPE);
19038 TYPE_PRECISION (fp16_type) = 16;
19039 layout_type (fp16_type);
19040 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19044 arm_init_builtins (void)
19046 arm_init_tls_builtins ();
19048 if (TARGET_REALLY_IWMMXT)
19049 arm_init_iwmmxt_builtins ();
19052 arm_init_neon_builtins ();
19054 if (arm_fp16_format)
19055 arm_init_fp16_builtins ();
19058 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19060 static const char *
19061 arm_invalid_parameter_type (const_tree t)
19063 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19064 return N_("function parameters cannot have __fp16 type");
19068 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19070 static const char *
19071 arm_invalid_return_type (const_tree t)
19073 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19074 return N_("functions cannot return __fp16 type");
19078 /* Implement TARGET_PROMOTED_TYPE. */
19081 arm_promoted_type (const_tree t)
19083 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19084 return float_type_node;
19088 /* Implement TARGET_CONVERT_TO_TYPE.
19089 Specifically, this hook implements the peculiarity of the ARM
19090 half-precision floating-point C semantics that requires conversions between
19091 __fp16 to or from double to do an intermediate conversion to float. */
19094 arm_convert_to_type (tree type, tree expr)
19096 tree fromtype = TREE_TYPE (expr);
19097 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19099 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19100 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19101 return convert (type, convert (float_type_node, expr));
19105 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19106 This simply adds HFmode as a supported mode; even though we don't
19107 implement arithmetic on this type directly, it's supported by
19108 optabs conversions, much the way the double-word arithmetic is
19109 special-cased in the default hook. */
19112 arm_scalar_mode_supported_p (enum machine_mode mode)
19114 if (mode == HFmode)
19115 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19117 return default_scalar_mode_supported_p (mode);
19120 /* Errors in the source file can cause expand_expr to return const0_rtx
19121 where we expect a vector. To avoid crashing, use one of the vector
19122 clear instructions. */
19125 safe_vector_operand (rtx x, enum machine_mode mode)
19127 if (x != const0_rtx)
19129 x = gen_reg_rtx (mode);
19131 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19132 : gen_rtx_SUBREG (DImode, x, 0)));
19136 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19139 arm_expand_binop_builtin (enum insn_code icode,
19140 tree exp, rtx target)
19143 tree arg0 = CALL_EXPR_ARG (exp, 0);
19144 tree arg1 = CALL_EXPR_ARG (exp, 1);
19145 rtx op0 = expand_normal (arg0);
19146 rtx op1 = expand_normal (arg1);
19147 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19148 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19149 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19151 if (VECTOR_MODE_P (mode0))
19152 op0 = safe_vector_operand (op0, mode0);
19153 if (VECTOR_MODE_P (mode1))
19154 op1 = safe_vector_operand (op1, mode1);
19157 || GET_MODE (target) != tmode
19158 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19159 target = gen_reg_rtx (tmode);
19161 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19163 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19164 op0 = copy_to_mode_reg (mode0, op0);
19165 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19166 op1 = copy_to_mode_reg (mode1, op1);
19168 pat = GEN_FCN (icode) (target, op0, op1);
19175 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19178 arm_expand_unop_builtin (enum insn_code icode,
19179 tree exp, rtx target, int do_load)
19182 tree arg0 = CALL_EXPR_ARG (exp, 0);
19183 rtx op0 = expand_normal (arg0);
19184 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19185 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19188 || GET_MODE (target) != tmode
19189 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19190 target = gen_reg_rtx (tmode);
19192 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19195 if (VECTOR_MODE_P (mode0))
19196 op0 = safe_vector_operand (op0, mode0);
19198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19199 op0 = copy_to_mode_reg (mode0, op0);
19202 pat = GEN_FCN (icode) (target, op0);
19210 neon_builtin_compare (const void *a, const void *b)
19212 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19213 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19214 unsigned int soughtcode = key->base_fcode;
19216 if (soughtcode >= memb->base_fcode
19217 && soughtcode < memb->base_fcode + memb->num_vars)
19219 else if (soughtcode < memb->base_fcode)
19225 static enum insn_code
19226 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19228 neon_builtin_datum key
19229 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19230 neon_builtin_datum *found;
19233 key.base_fcode = fcode;
19234 found = (neon_builtin_datum *)
19235 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19236 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19237 gcc_assert (found);
19238 idx = fcode - (int) found->base_fcode;
19239 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19242 *itype = found->itype;
19244 return found->codes[idx];
19248 NEON_ARG_COPY_TO_REG,
19253 #define NEON_MAX_BUILTIN_ARGS 5
19255 /* Expand a Neon builtin. */
19257 arm_expand_neon_args (rtx target, int icode, int have_retval,
19262 tree arg[NEON_MAX_BUILTIN_ARGS];
19263 rtx op[NEON_MAX_BUILTIN_ARGS];
19264 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19265 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19270 || GET_MODE (target) != tmode
19271 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19272 target = gen_reg_rtx (tmode);
19274 va_start (ap, exp);
19278 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19280 if (thisarg == NEON_ARG_STOP)
19284 arg[argc] = CALL_EXPR_ARG (exp, argc);
19285 op[argc] = expand_normal (arg[argc]);
19286 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19290 case NEON_ARG_COPY_TO_REG:
19291 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19292 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19293 (op[argc], mode[argc]))
19294 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19297 case NEON_ARG_CONSTANT:
19298 /* FIXME: This error message is somewhat unhelpful. */
19299 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19300 (op[argc], mode[argc]))
19301 error ("argument must be a constant");
19304 case NEON_ARG_STOP:
19305 gcc_unreachable ();
19318 pat = GEN_FCN (icode) (target, op[0]);
19322 pat = GEN_FCN (icode) (target, op[0], op[1]);
19326 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19330 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19334 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19338 gcc_unreachable ();
19344 pat = GEN_FCN (icode) (op[0]);
19348 pat = GEN_FCN (icode) (op[0], op[1]);
19352 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19356 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19360 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19364 gcc_unreachable ();
19375 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19376 constants defined per-instruction or per instruction-variant. Instead, the
19377 required info is looked up in the table neon_builtin_data. */
19379 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19382 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19389 return arm_expand_neon_args (target, icode, 1, exp,
19390 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19394 case NEON_SCALARMUL:
19395 case NEON_SCALARMULL:
19396 case NEON_SCALARMULH:
19397 case NEON_SHIFTINSERT:
19398 case NEON_LOGICBINOP:
19399 return arm_expand_neon_args (target, icode, 1, exp,
19400 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19404 return arm_expand_neon_args (target, icode, 1, exp,
19405 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19406 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19410 case NEON_SHIFTIMM:
19411 return arm_expand_neon_args (target, icode, 1, exp,
19412 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19416 return arm_expand_neon_args (target, icode, 1, exp,
19417 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19421 case NEON_REINTERP:
19422 return arm_expand_neon_args (target, icode, 1, exp,
19423 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19427 return arm_expand_neon_args (target, icode, 1, exp,
19428 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19430 case NEON_RESULTPAIR:
19431 return arm_expand_neon_args (target, icode, 0, exp,
19432 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19436 case NEON_LANEMULL:
19437 case NEON_LANEMULH:
19438 return arm_expand_neon_args (target, icode, 1, exp,
19439 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19440 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19443 return arm_expand_neon_args (target, icode, 1, exp,
19444 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19445 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19447 case NEON_SHIFTACC:
19448 return arm_expand_neon_args (target, icode, 1, exp,
19449 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19450 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19452 case NEON_SCALARMAC:
19453 return arm_expand_neon_args (target, icode, 1, exp,
19454 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19455 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19459 return arm_expand_neon_args (target, icode, 1, exp,
19460 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19464 case NEON_LOADSTRUCT:
19465 return arm_expand_neon_args (target, icode, 1, exp,
19466 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19468 case NEON_LOAD1LANE:
19469 case NEON_LOADSTRUCTLANE:
19470 return arm_expand_neon_args (target, icode, 1, exp,
19471 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19475 case NEON_STORESTRUCT:
19476 return arm_expand_neon_args (target, icode, 0, exp,
19477 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19479 case NEON_STORE1LANE:
19480 case NEON_STORESTRUCTLANE:
19481 return arm_expand_neon_args (target, icode, 0, exp,
19482 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19486 gcc_unreachable ();
19489 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19491 neon_reinterpret (rtx dest, rtx src)
19493 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19496 /* Emit code to place a Neon pair result in memory locations (with equal
19499 neon_emit_pair_result_insn (enum machine_mode mode,
19500 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19503 rtx mem = gen_rtx_MEM (mode, destaddr);
19504 rtx tmp1 = gen_reg_rtx (mode);
19505 rtx tmp2 = gen_reg_rtx (mode);
19507 emit_insn (intfn (tmp1, op1, tmp2, op2));
19509 emit_move_insn (mem, tmp1);
19510 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19511 emit_move_insn (mem, tmp2);
19514 /* Set up operands for a register copy from src to dest, taking care not to
19515 clobber registers in the process.
19516 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19517 be called with a large N, so that should be OK. */
19520 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19522 unsigned int copied = 0, opctr = 0;
19523 unsigned int done = (1 << count) - 1;
19526 while (copied != done)
19528 for (i = 0; i < count; i++)
19532 for (j = 0; good && j < count; j++)
19533 if (i != j && (copied & (1 << j)) == 0
19534 && reg_overlap_mentioned_p (src[j], dest[i]))
19539 operands[opctr++] = dest[i];
19540 operands[opctr++] = src[i];
19546 gcc_assert (opctr == count * 2);
19549 /* Expand an expression EXP that calls a built-in function,
19550 with result going to TARGET if that's convenient
19551 (and in mode MODE if that's convenient).
19552 SUBTARGET may be used as the target for computing one of EXP's operands.
19553 IGNORE is nonzero if the value is to be ignored. */
19556 arm_expand_builtin (tree exp,
19558 rtx subtarget ATTRIBUTE_UNUSED,
19559 enum machine_mode mode ATTRIBUTE_UNUSED,
19560 int ignore ATTRIBUTE_UNUSED)
19562 const struct builtin_description * d;
19563 enum insn_code icode;
19564 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19572 int fcode = DECL_FUNCTION_CODE (fndecl);
19574 enum machine_mode tmode;
19575 enum machine_mode mode0;
19576 enum machine_mode mode1;
19577 enum machine_mode mode2;
19579 if (fcode >= ARM_BUILTIN_NEON_BASE)
19580 return arm_expand_neon_builtin (fcode, exp, target);
19584 case ARM_BUILTIN_TEXTRMSB:
19585 case ARM_BUILTIN_TEXTRMUB:
19586 case ARM_BUILTIN_TEXTRMSH:
19587 case ARM_BUILTIN_TEXTRMUH:
19588 case ARM_BUILTIN_TEXTRMSW:
19589 case ARM_BUILTIN_TEXTRMUW:
19590 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19591 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19592 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19593 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19594 : CODE_FOR_iwmmxt_textrmw);
19596 arg0 = CALL_EXPR_ARG (exp, 0);
19597 arg1 = CALL_EXPR_ARG (exp, 1);
19598 op0 = expand_normal (arg0);
19599 op1 = expand_normal (arg1);
19600 tmode = insn_data[icode].operand[0].mode;
19601 mode0 = insn_data[icode].operand[1].mode;
19602 mode1 = insn_data[icode].operand[2].mode;
19604 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19605 op0 = copy_to_mode_reg (mode0, op0);
19606 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19608 /* @@@ better error message */
19609 error ("selector must be an immediate");
19610 return gen_reg_rtx (tmode);
19613 || GET_MODE (target) != tmode
19614 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19615 target = gen_reg_rtx (tmode);
19616 pat = GEN_FCN (icode) (target, op0, op1);
19622 case ARM_BUILTIN_TINSRB:
19623 case ARM_BUILTIN_TINSRH:
19624 case ARM_BUILTIN_TINSRW:
19625 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19626 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19627 : CODE_FOR_iwmmxt_tinsrw);
19628 arg0 = CALL_EXPR_ARG (exp, 0);
19629 arg1 = CALL_EXPR_ARG (exp, 1);
19630 arg2 = CALL_EXPR_ARG (exp, 2);
19631 op0 = expand_normal (arg0);
19632 op1 = expand_normal (arg1);
19633 op2 = expand_normal (arg2);
19634 tmode = insn_data[icode].operand[0].mode;
19635 mode0 = insn_data[icode].operand[1].mode;
19636 mode1 = insn_data[icode].operand[2].mode;
19637 mode2 = insn_data[icode].operand[3].mode;
19639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19640 op0 = copy_to_mode_reg (mode0, op0);
19641 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19642 op1 = copy_to_mode_reg (mode1, op1);
19643 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19645 /* @@@ better error message */
19646 error ("selector must be an immediate");
19650 || GET_MODE (target) != tmode
19651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19652 target = gen_reg_rtx (tmode);
19653 pat = GEN_FCN (icode) (target, op0, op1, op2);
19659 case ARM_BUILTIN_SETWCX:
19660 arg0 = CALL_EXPR_ARG (exp, 0);
19661 arg1 = CALL_EXPR_ARG (exp, 1);
19662 op0 = force_reg (SImode, expand_normal (arg0));
19663 op1 = expand_normal (arg1);
19664 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19667 case ARM_BUILTIN_GETWCX:
19668 arg0 = CALL_EXPR_ARG (exp, 0);
19669 op0 = expand_normal (arg0);
19670 target = gen_reg_rtx (SImode);
19671 emit_insn (gen_iwmmxt_tmrc (target, op0));
19674 case ARM_BUILTIN_WSHUFH:
19675 icode = CODE_FOR_iwmmxt_wshufh;
19676 arg0 = CALL_EXPR_ARG (exp, 0);
19677 arg1 = CALL_EXPR_ARG (exp, 1);
19678 op0 = expand_normal (arg0);
19679 op1 = expand_normal (arg1);
19680 tmode = insn_data[icode].operand[0].mode;
19681 mode1 = insn_data[icode].operand[1].mode;
19682 mode2 = insn_data[icode].operand[2].mode;
19684 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19685 op0 = copy_to_mode_reg (mode1, op0);
19686 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19688 /* @@@ better error message */
19689 error ("mask must be an immediate");
19693 || GET_MODE (target) != tmode
19694 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19695 target = gen_reg_rtx (tmode);
19696 pat = GEN_FCN (icode) (target, op0, op1);
19702 case ARM_BUILTIN_WSADB:
19703 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19704 case ARM_BUILTIN_WSADH:
19705 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19706 case ARM_BUILTIN_WSADBZ:
19707 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19708 case ARM_BUILTIN_WSADHZ:
19709 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19711 /* Several three-argument builtins. */
19712 case ARM_BUILTIN_WMACS:
19713 case ARM_BUILTIN_WMACU:
19714 case ARM_BUILTIN_WALIGN:
19715 case ARM_BUILTIN_TMIA:
19716 case ARM_BUILTIN_TMIAPH:
19717 case ARM_BUILTIN_TMIATT:
19718 case ARM_BUILTIN_TMIATB:
19719 case ARM_BUILTIN_TMIABT:
19720 case ARM_BUILTIN_TMIABB:
19721 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19722 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19723 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19724 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19725 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19726 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19727 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19728 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19729 : CODE_FOR_iwmmxt_walign);
19730 arg0 = CALL_EXPR_ARG (exp, 0);
19731 arg1 = CALL_EXPR_ARG (exp, 1);
19732 arg2 = CALL_EXPR_ARG (exp, 2);
19733 op0 = expand_normal (arg0);
19734 op1 = expand_normal (arg1);
19735 op2 = expand_normal (arg2);
19736 tmode = insn_data[icode].operand[0].mode;
19737 mode0 = insn_data[icode].operand[1].mode;
19738 mode1 = insn_data[icode].operand[2].mode;
19739 mode2 = insn_data[icode].operand[3].mode;
19741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19742 op0 = copy_to_mode_reg (mode0, op0);
19743 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19744 op1 = copy_to_mode_reg (mode1, op1);
19745 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19746 op2 = copy_to_mode_reg (mode2, op2);
19748 || GET_MODE (target) != tmode
19749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19750 target = gen_reg_rtx (tmode);
19751 pat = GEN_FCN (icode) (target, op0, op1, op2);
19757 case ARM_BUILTIN_WZERO:
19758 target = gen_reg_rtx (DImode);
19759 emit_insn (gen_iwmmxt_clrdi (target));
19762 case ARM_BUILTIN_THREAD_POINTER:
19763 return arm_load_tp (target);
19769 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19770 if (d->code == (const enum arm_builtins) fcode)
19771 return arm_expand_binop_builtin (d->icode, exp, target);
19773 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19774 if (d->code == (const enum arm_builtins) fcode)
19775 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19777 /* @@@ Should really do something sensible here. */
19781 /* Return the number (counting from 0) of
19782 the least significant set bit in MASK. */
19785 number_of_first_bit_set (unsigned mask)
19790 (mask & (1 << bit)) == 0;
19797 /* Emit code to push or pop registers to or from the stack. F is the
19798 assembly file. MASK is the registers to push or pop. PUSH is
19799 nonzero if we should push, and zero if we should pop. For debugging
19800 output, if pushing, adjust CFA_OFFSET by the amount of space added
19801 to the stack. REAL_REGS should have the same number of bits set as
19802 MASK, and will be used instead (in the same order) to describe which
19803 registers were saved - this is used to mark the save slots when we
19804 push high registers after moving them to low registers. */
19806 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19807 unsigned long real_regs)
19810 int lo_mask = mask & 0xFF;
19811 int pushed_words = 0;
19815 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19817 /* Special case. Do not generate a POP PC statement here, do it in
19819 thumb_exit (f, -1);
19823 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19825 fprintf (f, "\t.save\t{");
19826 for (regno = 0; regno < 15; regno++)
19828 if (real_regs & (1 << regno))
19830 if (real_regs & ((1 << regno) -1))
19832 asm_fprintf (f, "%r", regno);
19835 fprintf (f, "}\n");
19838 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19840 /* Look at the low registers first. */
19841 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19845 asm_fprintf (f, "%r", regno);
19847 if ((lo_mask & ~1) != 0)
19854 if (push && (mask & (1 << LR_REGNUM)))
19856 /* Catch pushing the LR. */
19860 asm_fprintf (f, "%r", LR_REGNUM);
19864 else if (!push && (mask & (1 << PC_REGNUM)))
19866 /* Catch popping the PC. */
19867 if (TARGET_INTERWORK || TARGET_BACKTRACE
19868 || crtl->calls_eh_return)
19870 /* The PC is never poped directly, instead
19871 it is popped into r3 and then BX is used. */
19872 fprintf (f, "}\n");
19874 thumb_exit (f, -1);
19883 asm_fprintf (f, "%r", PC_REGNUM);
19887 fprintf (f, "}\n");
19889 if (push && pushed_words && dwarf2out_do_frame ())
19891 char *l = dwarf2out_cfi_label (false);
19892 int pushed_mask = real_regs;
19894 *cfa_offset += pushed_words * 4;
19895 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19898 pushed_mask = real_regs;
19899 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19901 if (pushed_mask & 1)
19902 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19907 /* Generate code to return from a thumb function.
19908 If 'reg_containing_return_addr' is -1, then the return address is
19909 actually on the stack, at the stack pointer. */
19911 thumb_exit (FILE *f, int reg_containing_return_addr)
19913 unsigned regs_available_for_popping;
19914 unsigned regs_to_pop;
19916 unsigned available;
19920 int restore_a4 = FALSE;
19922 /* Compute the registers we need to pop. */
19926 if (reg_containing_return_addr == -1)
19928 regs_to_pop |= 1 << LR_REGNUM;
19932 if (TARGET_BACKTRACE)
19934 /* Restore the (ARM) frame pointer and stack pointer. */
19935 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19939 /* If there is nothing to pop then just emit the BX instruction and
19941 if (pops_needed == 0)
19943 if (crtl->calls_eh_return)
19944 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19946 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19949 /* Otherwise if we are not supporting interworking and we have not created
19950 a backtrace structure and the function was not entered in ARM mode then
19951 just pop the return address straight into the PC. */
19952 else if (!TARGET_INTERWORK
19953 && !TARGET_BACKTRACE
19954 && !is_called_in_ARM_mode (current_function_decl)
19955 && !crtl->calls_eh_return)
19957 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19961 /* Find out how many of the (return) argument registers we can corrupt. */
19962 regs_available_for_popping = 0;
19964 /* If returning via __builtin_eh_return, the bottom three registers
19965 all contain information needed for the return. */
19966 if (crtl->calls_eh_return)
19970 /* If we can deduce the registers used from the function's
19971 return value. This is more reliable that examining
19972 df_regs_ever_live_p () because that will be set if the register is
19973 ever used in the function, not just if the register is used
19974 to hold a return value. */
19976 if (crtl->return_rtx != 0)
19977 mode = GET_MODE (crtl->return_rtx);
19979 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19981 size = GET_MODE_SIZE (mode);
19985 /* In a void function we can use any argument register.
19986 In a function that returns a structure on the stack
19987 we can use the second and third argument registers. */
19988 if (mode == VOIDmode)
19989 regs_available_for_popping =
19990 (1 << ARG_REGISTER (1))
19991 | (1 << ARG_REGISTER (2))
19992 | (1 << ARG_REGISTER (3));
19994 regs_available_for_popping =
19995 (1 << ARG_REGISTER (2))
19996 | (1 << ARG_REGISTER (3));
19998 else if (size <= 4)
19999 regs_available_for_popping =
20000 (1 << ARG_REGISTER (2))
20001 | (1 << ARG_REGISTER (3));
20002 else if (size <= 8)
20003 regs_available_for_popping =
20004 (1 << ARG_REGISTER (3));
20007 /* Match registers to be popped with registers into which we pop them. */
20008 for (available = regs_available_for_popping,
20009 required = regs_to_pop;
20010 required != 0 && available != 0;
20011 available &= ~(available & - available),
20012 required &= ~(required & - required))
20015 /* If we have any popping registers left over, remove them. */
20017 regs_available_for_popping &= ~available;
20019 /* Otherwise if we need another popping register we can use
20020 the fourth argument register. */
20021 else if (pops_needed)
20023 /* If we have not found any free argument registers and
20024 reg a4 contains the return address, we must move it. */
20025 if (regs_available_for_popping == 0
20026 && reg_containing_return_addr == LAST_ARG_REGNUM)
20028 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20029 reg_containing_return_addr = LR_REGNUM;
20031 else if (size > 12)
20033 /* Register a4 is being used to hold part of the return value,
20034 but we have dire need of a free, low register. */
20037 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20040 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20042 /* The fourth argument register is available. */
20043 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20049 /* Pop as many registers as we can. */
20050 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20051 regs_available_for_popping);
20053 /* Process the registers we popped. */
20054 if (reg_containing_return_addr == -1)
20056 /* The return address was popped into the lowest numbered register. */
20057 regs_to_pop &= ~(1 << LR_REGNUM);
20059 reg_containing_return_addr =
20060 number_of_first_bit_set (regs_available_for_popping);
20062 /* Remove this register for the mask of available registers, so that
20063 the return address will not be corrupted by further pops. */
20064 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20067 /* If we popped other registers then handle them here. */
20068 if (regs_available_for_popping)
20072 /* Work out which register currently contains the frame pointer. */
20073 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20075 /* Move it into the correct place. */
20076 asm_fprintf (f, "\tmov\t%r, %r\n",
20077 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20079 /* (Temporarily) remove it from the mask of popped registers. */
20080 regs_available_for_popping &= ~(1 << frame_pointer);
20081 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20083 if (regs_available_for_popping)
20087 /* We popped the stack pointer as well,
20088 find the register that contains it. */
20089 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20091 /* Move it into the stack register. */
20092 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20094 /* At this point we have popped all necessary registers, so
20095 do not worry about restoring regs_available_for_popping
20096 to its correct value:
20098 assert (pops_needed == 0)
20099 assert (regs_available_for_popping == (1 << frame_pointer))
20100 assert (regs_to_pop == (1 << STACK_POINTER)) */
20104 /* Since we have just move the popped value into the frame
20105 pointer, the popping register is available for reuse, and
20106 we know that we still have the stack pointer left to pop. */
20107 regs_available_for_popping |= (1 << frame_pointer);
20111 /* If we still have registers left on the stack, but we no longer have
20112 any registers into which we can pop them, then we must move the return
20113 address into the link register and make available the register that
20115 if (regs_available_for_popping == 0 && pops_needed > 0)
20117 regs_available_for_popping |= 1 << reg_containing_return_addr;
20119 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20120 reg_containing_return_addr);
20122 reg_containing_return_addr = LR_REGNUM;
20125 /* If we have registers left on the stack then pop some more.
20126 We know that at most we will want to pop FP and SP. */
20127 if (pops_needed > 0)
20132 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20133 regs_available_for_popping);
20135 /* We have popped either FP or SP.
20136 Move whichever one it is into the correct register. */
20137 popped_into = number_of_first_bit_set (regs_available_for_popping);
20138 move_to = number_of_first_bit_set (regs_to_pop);
20140 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20142 regs_to_pop &= ~(1 << move_to);
20147 /* If we still have not popped everything then we must have only
20148 had one register available to us and we are now popping the SP. */
20149 if (pops_needed > 0)
20153 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20154 regs_available_for_popping);
20156 popped_into = number_of_first_bit_set (regs_available_for_popping);
20158 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20160 assert (regs_to_pop == (1 << STACK_POINTER))
20161 assert (pops_needed == 1)
20165 /* If necessary restore the a4 register. */
20168 if (reg_containing_return_addr != LR_REGNUM)
20170 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20171 reg_containing_return_addr = LR_REGNUM;
20174 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20177 if (crtl->calls_eh_return)
20178 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20180 /* Return to caller. */
20181 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20184 /* Scan INSN just before assembler is output for it.
20185 For Thumb-1, we track the status of the condition codes; this
20186 information is used in the cbranchsi4_insn pattern. */
20188 thumb1_final_prescan_insn (rtx insn)
20190 if (flag_print_asm_name)
20191 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20192 INSN_ADDRESSES (INSN_UID (insn)));
20193 /* Don't overwrite the previous setter when we get to a cbranch. */
20194 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20196 enum attr_conds conds;
20198 if (cfun->machine->thumb1_cc_insn)
20200 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20201 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20204 conds = get_attr_conds (insn);
20205 if (conds == CONDS_SET)
20207 rtx set = single_set (insn);
20208 cfun->machine->thumb1_cc_insn = insn;
20209 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20210 cfun->machine->thumb1_cc_op1 = const0_rtx;
20211 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20212 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20214 rtx src1 = XEXP (SET_SRC (set), 1);
20215 if (src1 == const0_rtx)
20216 cfun->machine->thumb1_cc_mode = CCmode;
20219 else if (conds != CONDS_NOCOND)
20220 cfun->machine->thumb1_cc_insn = NULL_RTX;
20225 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20227 unsigned HOST_WIDE_INT mask = 0xff;
20230 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20231 if (val == 0) /* XXX */
20234 for (i = 0; i < 25; i++)
20235 if ((val & (mask << i)) == val)
20241 /* Returns nonzero if the current function contains,
20242 or might contain a far jump. */
20244 thumb_far_jump_used_p (void)
20248 /* This test is only important for leaf functions. */
20249 /* assert (!leaf_function_p ()); */
20251 /* If we have already decided that far jumps may be used,
20252 do not bother checking again, and always return true even if
20253 it turns out that they are not being used. Once we have made
20254 the decision that far jumps are present (and that hence the link
20255 register will be pushed onto the stack) we cannot go back on it. */
20256 if (cfun->machine->far_jump_used)
20259 /* If this function is not being called from the prologue/epilogue
20260 generation code then it must be being called from the
20261 INITIAL_ELIMINATION_OFFSET macro. */
20262 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20264 /* In this case we know that we are being asked about the elimination
20265 of the arg pointer register. If that register is not being used,
20266 then there are no arguments on the stack, and we do not have to
20267 worry that a far jump might force the prologue to push the link
20268 register, changing the stack offsets. In this case we can just
20269 return false, since the presence of far jumps in the function will
20270 not affect stack offsets.
20272 If the arg pointer is live (or if it was live, but has now been
20273 eliminated and so set to dead) then we do have to test to see if
20274 the function might contain a far jump. This test can lead to some
20275 false negatives, since before reload is completed, then length of
20276 branch instructions is not known, so gcc defaults to returning their
20277 longest length, which in turn sets the far jump attribute to true.
20279 A false negative will not result in bad code being generated, but it
20280 will result in a needless push and pop of the link register. We
20281 hope that this does not occur too often.
20283 If we need doubleword stack alignment this could affect the other
20284 elimination offsets so we can't risk getting it wrong. */
20285 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20286 cfun->machine->arg_pointer_live = 1;
20287 else if (!cfun->machine->arg_pointer_live)
20291 /* Check to see if the function contains a branch
20292 insn with the far jump attribute set. */
20293 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20295 if (GET_CODE (insn) == JUMP_INSN
20296 /* Ignore tablejump patterns. */
20297 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20298 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20299 && get_attr_far_jump (insn) == FAR_JUMP_YES
20302 /* Record the fact that we have decided that
20303 the function does use far jumps. */
20304 cfun->machine->far_jump_used = 1;
20312 /* Return nonzero if FUNC must be entered in ARM mode. */
20314 is_called_in_ARM_mode (tree func)
20316 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20318 /* Ignore the problem about functions whose address is taken. */
20319 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20323 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20329 /* Given the stack offsets and register mask in OFFSETS, decide how
20330 many additional registers to push instead of subtracting a constant
20331 from SP. For epilogues the principle is the same except we use pop.
20332 FOR_PROLOGUE indicates which we're generating. */
20334 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20336 HOST_WIDE_INT amount;
20337 unsigned long live_regs_mask = offsets->saved_regs_mask;
20338 /* Extract a mask of the ones we can give to the Thumb's push/pop
20340 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20341 /* Then count how many other high registers will need to be pushed. */
20342 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20343 int n_free, reg_base;
20345 if (!for_prologue && frame_pointer_needed)
20346 amount = offsets->locals_base - offsets->saved_regs;
20348 amount = offsets->outgoing_args - offsets->saved_regs;
20350 /* If the stack frame size is 512 exactly, we can save one load
20351 instruction, which should make this a win even when optimizing
20353 if (!optimize_size && amount != 512)
20356 /* Can't do this if there are high registers to push. */
20357 if (high_regs_pushed != 0)
20360 /* Shouldn't do it in the prologue if no registers would normally
20361 be pushed at all. In the epilogue, also allow it if we'll have
20362 a pop insn for the PC. */
20365 || TARGET_BACKTRACE
20366 || (live_regs_mask & 1 << LR_REGNUM) == 0
20367 || TARGET_INTERWORK
20368 || crtl->args.pretend_args_size != 0))
20371 /* Don't do this if thumb_expand_prologue wants to emit instructions
20372 between the push and the stack frame allocation. */
20374 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20375 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20382 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20383 live_regs_mask >>= reg_base;
20386 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20387 && (for_prologue || call_used_regs[reg_base + n_free]))
20389 live_regs_mask >>= 1;
20395 gcc_assert (amount / 4 * 4 == amount);
20397 if (amount >= 512 && (amount - n_free * 4) < 512)
20398 return (amount - 508) / 4;
20399 if (amount <= n_free * 4)
20404 /* The bits which aren't usefully expanded as rtl. */
20406 thumb_unexpanded_epilogue (void)
20408 arm_stack_offsets *offsets;
20410 unsigned long live_regs_mask = 0;
20411 int high_regs_pushed = 0;
20413 int had_to_push_lr;
20416 if (cfun->machine->return_used_this_function != 0)
20419 if (IS_NAKED (arm_current_func_type ()))
20422 offsets = arm_get_frame_offsets ();
20423 live_regs_mask = offsets->saved_regs_mask;
20424 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20426 /* If we can deduce the registers used from the function's return value.
20427 This is more reliable that examining df_regs_ever_live_p () because that
20428 will be set if the register is ever used in the function, not just if
20429 the register is used to hold a return value. */
20430 size = arm_size_return_regs ();
20432 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20435 unsigned long extra_mask = (1 << extra_pop) - 1;
20436 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20439 /* The prolog may have pushed some high registers to use as
20440 work registers. e.g. the testsuite file:
20441 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20442 compiles to produce:
20443 push {r4, r5, r6, r7, lr}
20447 as part of the prolog. We have to undo that pushing here. */
20449 if (high_regs_pushed)
20451 unsigned long mask = live_regs_mask & 0xff;
20454 /* The available low registers depend on the size of the value we are
20462 /* Oh dear! We have no low registers into which we can pop
20465 ("no low registers available for popping high registers");
20467 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20468 if (live_regs_mask & (1 << next_hi_reg))
20471 while (high_regs_pushed)
20473 /* Find lo register(s) into which the high register(s) can
20475 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20477 if (mask & (1 << regno))
20478 high_regs_pushed--;
20479 if (high_regs_pushed == 0)
20483 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20485 /* Pop the values into the low register(s). */
20486 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20488 /* Move the value(s) into the high registers. */
20489 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20491 if (mask & (1 << regno))
20493 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20496 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20497 if (live_regs_mask & (1 << next_hi_reg))
20502 live_regs_mask &= ~0x0f00;
20505 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20506 live_regs_mask &= 0xff;
20508 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20510 /* Pop the return address into the PC. */
20511 if (had_to_push_lr)
20512 live_regs_mask |= 1 << PC_REGNUM;
20514 /* Either no argument registers were pushed or a backtrace
20515 structure was created which includes an adjusted stack
20516 pointer, so just pop everything. */
20517 if (live_regs_mask)
20518 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20521 /* We have either just popped the return address into the
20522 PC or it is was kept in LR for the entire function.
20523 Note that thumb_pushpop has already called thumb_exit if the
20524 PC was in the list. */
20525 if (!had_to_push_lr)
20526 thumb_exit (asm_out_file, LR_REGNUM);
20530 /* Pop everything but the return address. */
20531 if (live_regs_mask)
20532 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20535 if (had_to_push_lr)
20539 /* We have no free low regs, so save one. */
20540 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20544 /* Get the return address into a temporary register. */
20545 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20546 1 << LAST_ARG_REGNUM);
20550 /* Move the return address to lr. */
20551 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20553 /* Restore the low register. */
20554 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20559 regno = LAST_ARG_REGNUM;
20564 /* Remove the argument registers that were pushed onto the stack. */
20565 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20566 SP_REGNUM, SP_REGNUM,
20567 crtl->args.pretend_args_size);
20569 thumb_exit (asm_out_file, regno);
20575 /* Functions to save and restore machine-specific function data. */
20576 static struct machine_function *
20577 arm_init_machine_status (void)
20579 struct machine_function *machine;
20580 machine = ggc_alloc_cleared_machine_function ();
20582 #if ARM_FT_UNKNOWN != 0
20583 machine->func_type = ARM_FT_UNKNOWN;
20588 /* Return an RTX indicating where the return address to the
20589 calling function can be found. */
20591 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20596 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20599 /* Do anything needed before RTL is emitted for each function. */
20601 arm_init_expanders (void)
20603 /* Arrange to initialize and mark the machine per-function status. */
20604 init_machine_status = arm_init_machine_status;
20606 /* This is to stop the combine pass optimizing away the alignment
20607 adjustment of va_arg. */
20608 /* ??? It is claimed that this should not be necessary. */
20610 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20614 /* Like arm_compute_initial_elimination offset. Simpler because there
20615 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20616 to point at the base of the local variables after static stack
20617 space for a function has been allocated. */
20620 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20622 arm_stack_offsets *offsets;
20624 offsets = arm_get_frame_offsets ();
20628 case ARG_POINTER_REGNUM:
20631 case STACK_POINTER_REGNUM:
20632 return offsets->outgoing_args - offsets->saved_args;
20634 case FRAME_POINTER_REGNUM:
20635 return offsets->soft_frame - offsets->saved_args;
20637 case ARM_HARD_FRAME_POINTER_REGNUM:
20638 return offsets->saved_regs - offsets->saved_args;
20640 case THUMB_HARD_FRAME_POINTER_REGNUM:
20641 return offsets->locals_base - offsets->saved_args;
20644 gcc_unreachable ();
20648 case FRAME_POINTER_REGNUM:
20651 case STACK_POINTER_REGNUM:
20652 return offsets->outgoing_args - offsets->soft_frame;
20654 case ARM_HARD_FRAME_POINTER_REGNUM:
20655 return offsets->saved_regs - offsets->soft_frame;
20657 case THUMB_HARD_FRAME_POINTER_REGNUM:
20658 return offsets->locals_base - offsets->soft_frame;
20661 gcc_unreachable ();
20666 gcc_unreachable ();
20670 /* Generate the rest of a function's prologue. */
20672 thumb1_expand_prologue (void)
20676 HOST_WIDE_INT amount;
20677 arm_stack_offsets *offsets;
20678 unsigned long func_type;
20680 unsigned long live_regs_mask;
20682 func_type = arm_current_func_type ();
20684 /* Naked functions don't have prologues. */
20685 if (IS_NAKED (func_type))
20688 if (IS_INTERRUPT (func_type))
20690 error ("interrupt Service Routines cannot be coded in Thumb mode");
20694 offsets = arm_get_frame_offsets ();
20695 live_regs_mask = offsets->saved_regs_mask;
20696 /* Load the pic register before setting the frame pointer,
20697 so we can use r7 as a temporary work register. */
20698 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20699 arm_load_pic_register (live_regs_mask);
20701 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20702 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20703 stack_pointer_rtx);
20705 if (flag_stack_usage)
20706 current_function_static_stack_size
20707 = offsets->outgoing_args - offsets->saved_args;
20709 amount = offsets->outgoing_args - offsets->saved_regs;
20710 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20715 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20716 GEN_INT (- amount)));
20717 RTX_FRAME_RELATED_P (insn) = 1;
20723 /* The stack decrement is too big for an immediate value in a single
20724 insn. In theory we could issue multiple subtracts, but after
20725 three of them it becomes more space efficient to place the full
20726 value in the constant pool and load into a register. (Also the
20727 ARM debugger really likes to see only one stack decrement per
20728 function). So instead we look for a scratch register into which
20729 we can load the decrement, and then we subtract this from the
20730 stack pointer. Unfortunately on the thumb the only available
20731 scratch registers are the argument registers, and we cannot use
20732 these as they may hold arguments to the function. Instead we
20733 attempt to locate a call preserved register which is used by this
20734 function. If we can find one, then we know that it will have
20735 been pushed at the start of the prologue and so we can corrupt
20737 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20738 if (live_regs_mask & (1 << regno))
20741 gcc_assert(regno <= LAST_LO_REGNUM);
20743 reg = gen_rtx_REG (SImode, regno);
20745 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20747 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20748 stack_pointer_rtx, reg));
20749 RTX_FRAME_RELATED_P (insn) = 1;
20750 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20751 plus_constant (stack_pointer_rtx,
20753 RTX_FRAME_RELATED_P (dwarf) = 1;
20754 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20758 if (frame_pointer_needed)
20759 thumb_set_frame_pointer (offsets);
20761 /* If we are profiling, make sure no instructions are scheduled before
20762 the call to mcount. Similarly if the user has requested no
20763 scheduling in the prolog. Similarly if we want non-call exceptions
20764 using the EABI unwinder, to prevent faulting instructions from being
20765 swapped with a stack adjustment. */
20766 if (crtl->profile || !TARGET_SCHED_PROLOG
20767 || (arm_except_unwind_info (&global_options) == UI_TARGET
20768 && cfun->can_throw_non_call_exceptions))
20769 emit_insn (gen_blockage ());
20771 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20772 if (live_regs_mask & 0xff)
20773 cfun->machine->lr_save_eliminated = 0;
20778 thumb1_expand_epilogue (void)
20780 HOST_WIDE_INT amount;
20781 arm_stack_offsets *offsets;
20784 /* Naked functions don't have prologues. */
20785 if (IS_NAKED (arm_current_func_type ()))
20788 offsets = arm_get_frame_offsets ();
20789 amount = offsets->outgoing_args - offsets->saved_regs;
20791 if (frame_pointer_needed)
20793 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20794 amount = offsets->locals_base - offsets->saved_regs;
20796 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20798 gcc_assert (amount >= 0);
20802 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20803 GEN_INT (amount)));
20806 /* r3 is always free in the epilogue. */
20807 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20809 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20810 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20814 /* Emit a USE (stack_pointer_rtx), so that
20815 the stack adjustment will not be deleted. */
20816 emit_insn (gen_prologue_use (stack_pointer_rtx));
20818 if (crtl->profile || !TARGET_SCHED_PROLOG)
20819 emit_insn (gen_blockage ());
20821 /* Emit a clobber for each insn that will be restored in the epilogue,
20822 so that flow2 will get register lifetimes correct. */
20823 for (regno = 0; regno < 13; regno++)
20824 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20825 emit_clobber (gen_rtx_REG (SImode, regno));
20827 if (! df_regs_ever_live_p (LR_REGNUM))
20828 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20832 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20834 arm_stack_offsets *offsets;
20835 unsigned long live_regs_mask = 0;
20836 unsigned long l_mask;
20837 unsigned high_regs_pushed = 0;
20838 int cfa_offset = 0;
20841 if (IS_NAKED (arm_current_func_type ()))
20844 if (is_called_in_ARM_mode (current_function_decl))
20848 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20849 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20851 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20853 /* Generate code sequence to switch us into Thumb mode. */
20854 /* The .code 32 directive has already been emitted by
20855 ASM_DECLARE_FUNCTION_NAME. */
20856 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20857 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20859 /* Generate a label, so that the debugger will notice the
20860 change in instruction sets. This label is also used by
20861 the assembler to bypass the ARM code when this function
20862 is called from a Thumb encoded function elsewhere in the
20863 same file. Hence the definition of STUB_NAME here must
20864 agree with the definition in gas/config/tc-arm.c. */
20866 #define STUB_NAME ".real_start_of"
20868 fprintf (f, "\t.code\t16\n");
20870 if (arm_dllexport_name_p (name))
20871 name = arm_strip_name_encoding (name);
20873 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20874 fprintf (f, "\t.thumb_func\n");
20875 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20878 if (crtl->args.pretend_args_size)
20880 /* Output unwind directive for the stack adjustment. */
20881 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20882 fprintf (f, "\t.pad #%d\n",
20883 crtl->args.pretend_args_size);
20885 if (cfun->machine->uses_anonymous_args)
20889 fprintf (f, "\tpush\t{");
20891 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20893 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20894 regno <= LAST_ARG_REGNUM;
20896 asm_fprintf (f, "%r%s", regno,
20897 regno == LAST_ARG_REGNUM ? "" : ", ");
20899 fprintf (f, "}\n");
20902 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20903 SP_REGNUM, SP_REGNUM,
20904 crtl->args.pretend_args_size);
20906 /* We don't need to record the stores for unwinding (would it
20907 help the debugger any if we did?), but record the change in
20908 the stack pointer. */
20909 if (dwarf2out_do_frame ())
20911 char *l = dwarf2out_cfi_label (false);
20913 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20914 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20918 /* Get the registers we are going to push. */
20919 offsets = arm_get_frame_offsets ();
20920 live_regs_mask = offsets->saved_regs_mask;
20921 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20922 l_mask = live_regs_mask & 0x40ff;
20923 /* Then count how many other high registers will need to be pushed. */
20924 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20926 if (TARGET_BACKTRACE)
20929 unsigned work_register;
20931 /* We have been asked to create a stack backtrace structure.
20932 The code looks like this:
20936 0 sub SP, #16 Reserve space for 4 registers.
20937 2 push {R7} Push low registers.
20938 4 add R7, SP, #20 Get the stack pointer before the push.
20939 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20940 8 mov R7, PC Get hold of the start of this code plus 12.
20941 10 str R7, [SP, #16] Store it.
20942 12 mov R7, FP Get hold of the current frame pointer.
20943 14 str R7, [SP, #4] Store it.
20944 16 mov R7, LR Get hold of the current return address.
20945 18 str R7, [SP, #12] Store it.
20946 20 add R7, SP, #16 Point at the start of the backtrace structure.
20947 22 mov FP, R7 Put this value into the frame pointer. */
20949 work_register = thumb_find_work_register (live_regs_mask);
20951 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20952 asm_fprintf (f, "\t.pad #16\n");
20955 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20956 SP_REGNUM, SP_REGNUM);
20958 if (dwarf2out_do_frame ())
20960 char *l = dwarf2out_cfi_label (false);
20962 cfa_offset = cfa_offset + 16;
20963 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20968 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20969 offset = bit_count (l_mask) * UNITS_PER_WORD;
20974 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20975 offset + 16 + crtl->args.pretend_args_size);
20977 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20980 /* Make sure that the instruction fetching the PC is in the right place
20981 to calculate "start of backtrace creation code + 12". */
20984 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20985 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20987 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20988 ARM_HARD_FRAME_POINTER_REGNUM);
20989 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20994 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20995 ARM_HARD_FRAME_POINTER_REGNUM);
20996 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20998 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20999 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21003 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21004 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21006 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21008 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21009 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21011 /* Optimization: If we are not pushing any low registers but we are going
21012 to push some high registers then delay our first push. This will just
21013 be a push of LR and we can combine it with the push of the first high
21015 else if ((l_mask & 0xff) != 0
21016 || (high_regs_pushed == 0 && l_mask))
21018 unsigned long mask = l_mask;
21019 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21020 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21023 if (high_regs_pushed)
21025 unsigned pushable_regs;
21026 unsigned next_hi_reg;
21028 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21029 if (live_regs_mask & (1 << next_hi_reg))
21032 pushable_regs = l_mask & 0xff;
21034 if (pushable_regs == 0)
21035 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21037 while (high_regs_pushed > 0)
21039 unsigned long real_regs_mask = 0;
21041 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21043 if (pushable_regs & (1 << regno))
21045 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21047 high_regs_pushed --;
21048 real_regs_mask |= (1 << next_hi_reg);
21050 if (high_regs_pushed)
21052 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21054 if (live_regs_mask & (1 << next_hi_reg))
21059 pushable_regs &= ~((1 << regno) - 1);
21065 /* If we had to find a work register and we have not yet
21066 saved the LR then add it to the list of regs to push. */
21067 if (l_mask == (1 << LR_REGNUM))
21069 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21071 real_regs_mask | (1 << LR_REGNUM));
21075 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21080 /* Handle the case of a double word load into a low register from
21081 a computed memory address. The computed address may involve a
21082 register which is overwritten by the load. */
21084 thumb_load_double_from_address (rtx *operands)
21092 gcc_assert (GET_CODE (operands[0]) == REG);
21093 gcc_assert (GET_CODE (operands[1]) == MEM);
21095 /* Get the memory address. */
21096 addr = XEXP (operands[1], 0);
21098 /* Work out how the memory address is computed. */
21099 switch (GET_CODE (addr))
21102 operands[2] = adjust_address (operands[1], SImode, 4);
21104 if (REGNO (operands[0]) == REGNO (addr))
21106 output_asm_insn ("ldr\t%H0, %2", operands);
21107 output_asm_insn ("ldr\t%0, %1", operands);
21111 output_asm_insn ("ldr\t%0, %1", operands);
21112 output_asm_insn ("ldr\t%H0, %2", operands);
21117 /* Compute <address> + 4 for the high order load. */
21118 operands[2] = adjust_address (operands[1], SImode, 4);
21120 output_asm_insn ("ldr\t%0, %1", operands);
21121 output_asm_insn ("ldr\t%H0, %2", operands);
21125 arg1 = XEXP (addr, 0);
21126 arg2 = XEXP (addr, 1);
21128 if (CONSTANT_P (arg1))
21129 base = arg2, offset = arg1;
21131 base = arg1, offset = arg2;
21133 gcc_assert (GET_CODE (base) == REG);
21135 /* Catch the case of <address> = <reg> + <reg> */
21136 if (GET_CODE (offset) == REG)
21138 int reg_offset = REGNO (offset);
21139 int reg_base = REGNO (base);
21140 int reg_dest = REGNO (operands[0]);
21142 /* Add the base and offset registers together into the
21143 higher destination register. */
21144 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21145 reg_dest + 1, reg_base, reg_offset);
21147 /* Load the lower destination register from the address in
21148 the higher destination register. */
21149 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21150 reg_dest, reg_dest + 1);
21152 /* Load the higher destination register from its own address
21154 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21155 reg_dest + 1, reg_dest + 1);
21159 /* Compute <address> + 4 for the high order load. */
21160 operands[2] = adjust_address (operands[1], SImode, 4);
21162 /* If the computed address is held in the low order register
21163 then load the high order register first, otherwise always
21164 load the low order register first. */
21165 if (REGNO (operands[0]) == REGNO (base))
21167 output_asm_insn ("ldr\t%H0, %2", operands);
21168 output_asm_insn ("ldr\t%0, %1", operands);
21172 output_asm_insn ("ldr\t%0, %1", operands);
21173 output_asm_insn ("ldr\t%H0, %2", operands);
21179 /* With no registers to worry about we can just load the value
21181 operands[2] = adjust_address (operands[1], SImode, 4);
21183 output_asm_insn ("ldr\t%H0, %2", operands);
21184 output_asm_insn ("ldr\t%0, %1", operands);
21188 gcc_unreachable ();
21195 thumb_output_move_mem_multiple (int n, rtx *operands)
21202 if (REGNO (operands[4]) > REGNO (operands[5]))
21205 operands[4] = operands[5];
21208 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21209 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21213 if (REGNO (operands[4]) > REGNO (operands[5]))
21216 operands[4] = operands[5];
21219 if (REGNO (operands[5]) > REGNO (operands[6]))
21222 operands[5] = operands[6];
21225 if (REGNO (operands[4]) > REGNO (operands[5]))
21228 operands[4] = operands[5];
21232 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21233 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21237 gcc_unreachable ();
21243 /* Output a call-via instruction for thumb state. */
21245 thumb_call_via_reg (rtx reg)
21247 int regno = REGNO (reg);
21250 gcc_assert (regno < LR_REGNUM);
21252 /* If we are in the normal text section we can use a single instance
21253 per compilation unit. If we are doing function sections, then we need
21254 an entry per section, since we can't rely on reachability. */
21255 if (in_section == text_section)
21257 thumb_call_reg_needed = 1;
21259 if (thumb_call_via_label[regno] == NULL)
21260 thumb_call_via_label[regno] = gen_label_rtx ();
21261 labelp = thumb_call_via_label + regno;
21265 if (cfun->machine->call_via[regno] == NULL)
21266 cfun->machine->call_via[regno] = gen_label_rtx ();
21267 labelp = cfun->machine->call_via + regno;
21270 output_asm_insn ("bl\t%a0", labelp);
21274 /* Routines for generating rtl. */
21276 thumb_expand_movmemqi (rtx *operands)
21278 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21279 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21280 HOST_WIDE_INT len = INTVAL (operands[2]);
21281 HOST_WIDE_INT offset = 0;
21285 emit_insn (gen_movmem12b (out, in, out, in));
21291 emit_insn (gen_movmem8b (out, in, out, in));
21297 rtx reg = gen_reg_rtx (SImode);
21298 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21299 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21306 rtx reg = gen_reg_rtx (HImode);
21307 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21308 plus_constant (in, offset))));
21309 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21317 rtx reg = gen_reg_rtx (QImode);
21318 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21319 plus_constant (in, offset))));
21320 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21326 thumb_reload_out_hi (rtx *operands)
21328 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21331 /* Handle reading a half-word from memory during reload. */
21333 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21335 gcc_unreachable ();
21338 /* Return the length of a function name prefix
21339 that starts with the character 'c'. */
21341 arm_get_strip_length (int c)
21345 ARM_NAME_ENCODING_LENGTHS
21350 /* Return a pointer to a function's name with any
21351 and all prefix encodings stripped from it. */
21353 arm_strip_name_encoding (const char *name)
21357 while ((skip = arm_get_strip_length (* name)))
21363 /* If there is a '*' anywhere in the name's prefix, then
21364 emit the stripped name verbatim, otherwise prepend an
21365 underscore if leading underscores are being used. */
21367 arm_asm_output_labelref (FILE *stream, const char *name)
21372 while ((skip = arm_get_strip_length (* name)))
21374 verbatim |= (*name == '*');
21379 fputs (name, stream);
21381 asm_fprintf (stream, "%U%s", name);
21385 arm_file_start (void)
21389 if (TARGET_UNIFIED_ASM)
21390 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21394 const char *fpu_name;
21395 if (arm_selected_arch)
21396 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21398 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21400 if (TARGET_SOFT_FLOAT)
21403 fpu_name = "softvfp";
21405 fpu_name = "softfpa";
21409 fpu_name = arm_fpu_desc->name;
21410 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21412 if (TARGET_HARD_FLOAT)
21413 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21414 if (TARGET_HARD_FLOAT_ABI)
21415 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21418 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21420 /* Some of these attributes only apply when the corresponding features
21421 are used. However we don't have any easy way of figuring this out.
21422 Conservatively record the setting that would have been used. */
21424 /* Tag_ABI_FP_rounding. */
21425 if (flag_rounding_math)
21426 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21427 if (!flag_unsafe_math_optimizations)
21429 /* Tag_ABI_FP_denomal. */
21430 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21431 /* Tag_ABI_FP_exceptions. */
21432 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21434 /* Tag_ABI_FP_user_exceptions. */
21435 if (flag_signaling_nans)
21436 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21437 /* Tag_ABI_FP_number_model. */
21438 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21439 flag_finite_math_only ? 1 : 3);
21441 /* Tag_ABI_align8_needed. */
21442 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21443 /* Tag_ABI_align8_preserved. */
21444 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21445 /* Tag_ABI_enum_size. */
21446 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21447 flag_short_enums ? 1 : 2);
21449 /* Tag_ABI_optimization_goals. */
21452 else if (optimize >= 2)
21458 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21460 /* Tag_ABI_FP_16bit_format. */
21461 if (arm_fp16_format)
21462 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21463 (int)arm_fp16_format);
21465 if (arm_lang_output_object_attributes_hook)
21466 arm_lang_output_object_attributes_hook();
21468 default_file_start();
21472 arm_file_end (void)
21476 if (NEED_INDICATE_EXEC_STACK)
21477 /* Add .note.GNU-stack. */
21478 file_end_indicate_exec_stack ();
21480 if (! thumb_call_reg_needed)
21483 switch_to_section (text_section);
21484 asm_fprintf (asm_out_file, "\t.code 16\n");
21485 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21487 for (regno = 0; regno < LR_REGNUM; regno++)
21489 rtx label = thumb_call_via_label[regno];
21493 targetm.asm_out.internal_label (asm_out_file, "L",
21494 CODE_LABEL_NUMBER (label));
21495 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21501 /* Symbols in the text segment can be accessed without indirecting via the
21502 constant pool; it may take an extra binary operation, but this is still
21503 faster than indirecting via memory. Don't do this when not optimizing,
21504 since we won't be calculating al of the offsets necessary to do this
21508 arm_encode_section_info (tree decl, rtx rtl, int first)
21510 if (optimize > 0 && TREE_CONSTANT (decl))
21511 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21513 default_encode_section_info (decl, rtl, first);
21515 #endif /* !ARM_PE */
21518 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21520 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21521 && !strcmp (prefix, "L"))
21523 arm_ccfsm_state = 0;
21524 arm_target_insn = NULL;
21526 default_internal_label (stream, prefix, labelno);
21529 /* Output code to add DELTA to the first argument, and then jump
21530 to FUNCTION. Used for C++ multiple inheritance. */
21532 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21533 HOST_WIDE_INT delta,
21534 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21537 static int thunk_label = 0;
21540 int mi_delta = delta;
21541 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21543 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21546 mi_delta = - mi_delta;
21550 int labelno = thunk_label++;
21551 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21552 /* Thunks are entered in arm mode when avaiable. */
21553 if (TARGET_THUMB1_ONLY)
21555 /* push r3 so we can use it as a temporary. */
21556 /* TODO: Omit this save if r3 is not used. */
21557 fputs ("\tpush {r3}\n", file);
21558 fputs ("\tldr\tr3, ", file);
21562 fputs ("\tldr\tr12, ", file);
21564 assemble_name (file, label);
21565 fputc ('\n', file);
21568 /* If we are generating PIC, the ldr instruction below loads
21569 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21570 the address of the add + 8, so we have:
21572 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21575 Note that we have "+ 1" because some versions of GNU ld
21576 don't set the low bit of the result for R_ARM_REL32
21577 relocations against thumb function symbols.
21578 On ARMv6M this is +4, not +8. */
21579 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21580 assemble_name (file, labelpc);
21581 fputs (":\n", file);
21582 if (TARGET_THUMB1_ONLY)
21584 /* This is 2 insns after the start of the thunk, so we know it
21585 is 4-byte aligned. */
21586 fputs ("\tadd\tr3, pc, r3\n", file);
21587 fputs ("\tmov r12, r3\n", file);
21590 fputs ("\tadd\tr12, pc, r12\n", file);
21592 else if (TARGET_THUMB1_ONLY)
21593 fputs ("\tmov r12, r3\n", file);
21595 if (TARGET_THUMB1_ONLY)
21597 if (mi_delta > 255)
21599 fputs ("\tldr\tr3, ", file);
21600 assemble_name (file, label);
21601 fputs ("+4\n", file);
21602 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21603 mi_op, this_regno, this_regno);
21605 else if (mi_delta != 0)
21607 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21608 mi_op, this_regno, this_regno,
21614 /* TODO: Use movw/movt for large constants when available. */
21615 while (mi_delta != 0)
21617 if ((mi_delta & (3 << shift)) == 0)
21621 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21622 mi_op, this_regno, this_regno,
21623 mi_delta & (0xff << shift));
21624 mi_delta &= ~(0xff << shift);
21631 if (TARGET_THUMB1_ONLY)
21632 fputs ("\tpop\t{r3}\n", file);
21634 fprintf (file, "\tbx\tr12\n");
21635 ASM_OUTPUT_ALIGN (file, 2);
21636 assemble_name (file, label);
21637 fputs (":\n", file);
21640 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21641 rtx tem = XEXP (DECL_RTL (function), 0);
21642 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21643 tem = gen_rtx_MINUS (GET_MODE (tem),
21645 gen_rtx_SYMBOL_REF (Pmode,
21646 ggc_strdup (labelpc)));
21647 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21650 /* Output ".word .LTHUNKn". */
21651 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21653 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21654 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21658 fputs ("\tb\t", file);
21659 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21660 if (NEED_PLT_RELOC)
21661 fputs ("(PLT)", file);
21662 fputc ('\n', file);
21667 arm_emit_vector_const (FILE *file, rtx x)
21670 const char * pattern;
21672 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21674 switch (GET_MODE (x))
21676 case V2SImode: pattern = "%08x"; break;
21677 case V4HImode: pattern = "%04x"; break;
21678 case V8QImode: pattern = "%02x"; break;
21679 default: gcc_unreachable ();
21682 fprintf (file, "0x");
21683 for (i = CONST_VECTOR_NUNITS (x); i--;)
21687 element = CONST_VECTOR_ELT (x, i);
21688 fprintf (file, pattern, INTVAL (element));
21694 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21695 HFmode constant pool entries are actually loaded with ldr. */
21697 arm_emit_fp16_const (rtx c)
21702 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21703 bits = real_to_target (NULL, &r, HFmode);
21704 if (WORDS_BIG_ENDIAN)
21705 assemble_zeros (2);
21706 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21707 if (!WORDS_BIG_ENDIAN)
21708 assemble_zeros (2);
21712 arm_output_load_gr (rtx *operands)
21719 if (GET_CODE (operands [1]) != MEM
21720 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21721 || GET_CODE (reg = XEXP (sum, 0)) != REG
21722 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21723 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21724 return "wldrw%?\t%0, %1";
21726 /* Fix up an out-of-range load of a GR register. */
21727 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21728 wcgr = operands[0];
21730 output_asm_insn ("ldr%?\t%0, %1", operands);
21732 operands[0] = wcgr;
21734 output_asm_insn ("tmcr%?\t%0, %1", operands);
21735 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21740 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21742 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21743 named arg and all anonymous args onto the stack.
21744 XXX I know the prologue shouldn't be pushing registers, but it is faster
21748 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21749 enum machine_mode mode,
21752 int second_time ATTRIBUTE_UNUSED)
21756 cfun->machine->uses_anonymous_args = 1;
21757 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21759 nregs = pcum->aapcs_ncrn;
21760 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21764 nregs = pcum->nregs;
21766 if (nregs < NUM_ARG_REGS)
21767 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21770 /* Return nonzero if the CONSUMER instruction (a store) does not need
21771 PRODUCER's value to calculate the address. */
21774 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21776 rtx value = PATTERN (producer);
21777 rtx addr = PATTERN (consumer);
21779 if (GET_CODE (value) == COND_EXEC)
21780 value = COND_EXEC_CODE (value);
21781 if (GET_CODE (value) == PARALLEL)
21782 value = XVECEXP (value, 0, 0);
21783 value = XEXP (value, 0);
21784 if (GET_CODE (addr) == COND_EXEC)
21785 addr = COND_EXEC_CODE (addr);
21786 if (GET_CODE (addr) == PARALLEL)
21787 addr = XVECEXP (addr, 0, 0);
21788 addr = XEXP (addr, 0);
21790 return !reg_overlap_mentioned_p (value, addr);
21793 /* Return nonzero if the CONSUMER instruction (a store) does need
21794 PRODUCER's value to calculate the address. */
21797 arm_early_store_addr_dep (rtx producer, rtx consumer)
21799 return !arm_no_early_store_addr_dep (producer, consumer);
21802 /* Return nonzero if the CONSUMER instruction (a load) does need
21803 PRODUCER's value to calculate the address. */
21806 arm_early_load_addr_dep (rtx producer, rtx consumer)
21808 rtx value = PATTERN (producer);
21809 rtx addr = PATTERN (consumer);
21811 if (GET_CODE (value) == COND_EXEC)
21812 value = COND_EXEC_CODE (value);
21813 if (GET_CODE (value) == PARALLEL)
21814 value = XVECEXP (value, 0, 0);
21815 value = XEXP (value, 0);
21816 if (GET_CODE (addr) == COND_EXEC)
21817 addr = COND_EXEC_CODE (addr);
21818 if (GET_CODE (addr) == PARALLEL)
21819 addr = XVECEXP (addr, 0, 0);
21820 addr = XEXP (addr, 1);
21822 return reg_overlap_mentioned_p (value, addr);
21825 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21826 have an early register shift value or amount dependency on the
21827 result of PRODUCER. */
21830 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21832 rtx value = PATTERN (producer);
21833 rtx op = PATTERN (consumer);
21836 if (GET_CODE (value) == COND_EXEC)
21837 value = COND_EXEC_CODE (value);
21838 if (GET_CODE (value) == PARALLEL)
21839 value = XVECEXP (value, 0, 0);
21840 value = XEXP (value, 0);
21841 if (GET_CODE (op) == COND_EXEC)
21842 op = COND_EXEC_CODE (op);
21843 if (GET_CODE (op) == PARALLEL)
21844 op = XVECEXP (op, 0, 0);
21847 early_op = XEXP (op, 0);
21848 /* This is either an actual independent shift, or a shift applied to
21849 the first operand of another operation. We want the whole shift
21851 if (GET_CODE (early_op) == REG)
21854 return !reg_overlap_mentioned_p (value, early_op);
21857 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21858 have an early register shift value dependency on the result of
21862 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21864 rtx value = PATTERN (producer);
21865 rtx op = PATTERN (consumer);
21868 if (GET_CODE (value) == COND_EXEC)
21869 value = COND_EXEC_CODE (value);
21870 if (GET_CODE (value) == PARALLEL)
21871 value = XVECEXP (value, 0, 0);
21872 value = XEXP (value, 0);
21873 if (GET_CODE (op) == COND_EXEC)
21874 op = COND_EXEC_CODE (op);
21875 if (GET_CODE (op) == PARALLEL)
21876 op = XVECEXP (op, 0, 0);
21879 early_op = XEXP (op, 0);
21881 /* This is either an actual independent shift, or a shift applied to
21882 the first operand of another operation. We want the value being
21883 shifted, in either case. */
21884 if (GET_CODE (early_op) != REG)
21885 early_op = XEXP (early_op, 0);
21887 return !reg_overlap_mentioned_p (value, early_op);
21890 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21891 have an early register mult dependency on the result of
21895 arm_no_early_mul_dep (rtx producer, rtx consumer)
21897 rtx value = PATTERN (producer);
21898 rtx op = PATTERN (consumer);
21900 if (GET_CODE (value) == COND_EXEC)
21901 value = COND_EXEC_CODE (value);
21902 if (GET_CODE (value) == PARALLEL)
21903 value = XVECEXP (value, 0, 0);
21904 value = XEXP (value, 0);
21905 if (GET_CODE (op) == COND_EXEC)
21906 op = COND_EXEC_CODE (op);
21907 if (GET_CODE (op) == PARALLEL)
21908 op = XVECEXP (op, 0, 0);
21911 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21913 if (GET_CODE (XEXP (op, 0)) == MULT)
21914 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21916 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21922 /* We can't rely on the caller doing the proper promotion when
21923 using APCS or ATPCS. */
21926 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21928 return !TARGET_AAPCS_BASED;
21931 static enum machine_mode
21932 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21933 enum machine_mode mode,
21934 int *punsignedp ATTRIBUTE_UNUSED,
21935 const_tree fntype ATTRIBUTE_UNUSED,
21936 int for_return ATTRIBUTE_UNUSED)
21938 if (GET_MODE_CLASS (mode) == MODE_INT
21939 && GET_MODE_SIZE (mode) < 4)
21945 /* AAPCS based ABIs use short enums by default. */
21948 arm_default_short_enums (void)
21950 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21954 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21957 arm_align_anon_bitfield (void)
21959 return TARGET_AAPCS_BASED;
21963 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21966 arm_cxx_guard_type (void)
21968 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21971 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21972 has an accumulator dependency on the result of the producer (a
21973 multiplication instruction) and no other dependency on that result. */
21975 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21977 rtx mul = PATTERN (producer);
21978 rtx mac = PATTERN (consumer);
21980 rtx mac_op0, mac_op1, mac_acc;
21982 if (GET_CODE (mul) == COND_EXEC)
21983 mul = COND_EXEC_CODE (mul);
21984 if (GET_CODE (mac) == COND_EXEC)
21985 mac = COND_EXEC_CODE (mac);
21987 /* Check that mul is of the form (set (...) (mult ...))
21988 and mla is of the form (set (...) (plus (mult ...) (...))). */
21989 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21990 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21991 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21994 mul_result = XEXP (mul, 0);
21995 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21996 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21997 mac_acc = XEXP (XEXP (mac, 1), 1);
21999 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22000 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22001 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22005 /* The EABI says test the least significant bit of a guard variable. */
22008 arm_cxx_guard_mask_bit (void)
22010 return TARGET_AAPCS_BASED;
22014 /* The EABI specifies that all array cookies are 8 bytes long. */
22017 arm_get_cookie_size (tree type)
22021 if (!TARGET_AAPCS_BASED)
22022 return default_cxx_get_cookie_size (type);
22024 size = build_int_cst (sizetype, 8);
22029 /* The EABI says that array cookies should also contain the element size. */
22032 arm_cookie_has_size (void)
22034 return TARGET_AAPCS_BASED;
22038 /* The EABI says constructors and destructors should return a pointer to
22039 the object constructed/destroyed. */
22042 arm_cxx_cdtor_returns_this (void)
22044 return TARGET_AAPCS_BASED;
22047 /* The EABI says that an inline function may never be the key
22051 arm_cxx_key_method_may_be_inline (void)
22053 return !TARGET_AAPCS_BASED;
22057 arm_cxx_determine_class_data_visibility (tree decl)
22059 if (!TARGET_AAPCS_BASED
22060 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22063 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22064 is exported. However, on systems without dynamic vague linkage,
22065 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22066 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22067 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22069 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22070 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22074 arm_cxx_class_data_always_comdat (void)
22076 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22077 vague linkage if the class has no key function. */
22078 return !TARGET_AAPCS_BASED;
22082 /* The EABI says __aeabi_atexit should be used to register static
22086 arm_cxx_use_aeabi_atexit (void)
22088 return TARGET_AAPCS_BASED;
22093 arm_set_return_address (rtx source, rtx scratch)
22095 arm_stack_offsets *offsets;
22096 HOST_WIDE_INT delta;
22098 unsigned long saved_regs;
22100 offsets = arm_get_frame_offsets ();
22101 saved_regs = offsets->saved_regs_mask;
22103 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22104 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22107 if (frame_pointer_needed)
22108 addr = plus_constant(hard_frame_pointer_rtx, -4);
22111 /* LR will be the first saved register. */
22112 delta = offsets->outgoing_args - (offsets->frame + 4);
22117 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22118 GEN_INT (delta & ~4095)));
22123 addr = stack_pointer_rtx;
22125 addr = plus_constant (addr, delta);
22127 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22133 thumb_set_return_address (rtx source, rtx scratch)
22135 arm_stack_offsets *offsets;
22136 HOST_WIDE_INT delta;
22137 HOST_WIDE_INT limit;
22140 unsigned long mask;
22144 offsets = arm_get_frame_offsets ();
22145 mask = offsets->saved_regs_mask;
22146 if (mask & (1 << LR_REGNUM))
22149 /* Find the saved regs. */
22150 if (frame_pointer_needed)
22152 delta = offsets->soft_frame - offsets->saved_args;
22153 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22159 delta = offsets->outgoing_args - offsets->saved_args;
22162 /* Allow for the stack frame. */
22163 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22165 /* The link register is always the first saved register. */
22168 /* Construct the address. */
22169 addr = gen_rtx_REG (SImode, reg);
22172 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22173 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22177 addr = plus_constant (addr, delta);
22179 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22182 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22185 /* Implements target hook vector_mode_supported_p. */
22187 arm_vector_mode_supported_p (enum machine_mode mode)
22189 /* Neon also supports V2SImode, etc. listed in the clause below. */
22190 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22191 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22194 if ((TARGET_NEON || TARGET_IWMMXT)
22195 && ((mode == V2SImode)
22196 || (mode == V4HImode)
22197 || (mode == V8QImode)))
22203 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22204 registers when autovectorizing for Neon, at least until multiple vector
22205 widths are supported properly by the middle-end. */
22207 static enum machine_mode
22208 arm_preferred_simd_mode (enum machine_mode mode)
22214 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22216 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22218 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22220 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22222 if (TARGET_NEON_VECTORIZE_QUAD)
22229 if (TARGET_REALLY_IWMMXT)
22245 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22247 We need to define this for LO_REGS on thumb. Otherwise we can end up
22248 using r0-r4 for function arguments, r7 for the stack frame and don't
22249 have enough left over to do doubleword arithmetic. */
22252 arm_class_likely_spilled_p (reg_class_t rclass)
22254 if ((TARGET_THUMB && rclass == LO_REGS)
22255 || rclass == CC_REG)
22261 /* Implements target hook small_register_classes_for_mode_p. */
22263 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22265 return TARGET_THUMB1;
22268 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22269 ARM insns and therefore guarantee that the shift count is modulo 256.
22270 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22271 guarantee no particular behavior for out-of-range counts. */
22273 static unsigned HOST_WIDE_INT
22274 arm_shift_truncation_mask (enum machine_mode mode)
22276 return mode == SImode ? 255 : 0;
22280 /* Map internal gcc register numbers to DWARF2 register numbers. */
22283 arm_dbx_register_number (unsigned int regno)
22288 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22289 compatibility. The EABI defines them as registers 96-103. */
22290 if (IS_FPA_REGNUM (regno))
22291 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22293 if (IS_VFP_REGNUM (regno))
22295 /* See comment in arm_dwarf_register_span. */
22296 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22297 return 64 + regno - FIRST_VFP_REGNUM;
22299 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22302 if (IS_IWMMXT_GR_REGNUM (regno))
22303 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22305 if (IS_IWMMXT_REGNUM (regno))
22306 return 112 + regno - FIRST_IWMMXT_REGNUM;
22308 gcc_unreachable ();
22311 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22312 GCC models tham as 64 32-bit registers, so we need to describe this to
22313 the DWARF generation code. Other registers can use the default. */
22315 arm_dwarf_register_span (rtx rtl)
22322 regno = REGNO (rtl);
22323 if (!IS_VFP_REGNUM (regno))
22326 /* XXX FIXME: The EABI defines two VFP register ranges:
22327 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22329 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22330 corresponding D register. Until GDB supports this, we shall use the
22331 legacy encodings. We also use these encodings for D0-D15 for
22332 compatibility with older debuggers. */
22333 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22336 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22337 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22338 regno = (regno - FIRST_VFP_REGNUM) / 2;
22339 for (i = 0; i < nregs; i++)
22340 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22345 #if ARM_UNWIND_INFO
22346 /* Emit unwind directives for a store-multiple instruction or stack pointer
22347 push during alignment.
22348 These should only ever be generated by the function prologue code, so
22349 expect them to have a particular form. */
22352 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22355 HOST_WIDE_INT offset;
22356 HOST_WIDE_INT nregs;
22362 e = XVECEXP (p, 0, 0);
22363 if (GET_CODE (e) != SET)
22366 /* First insn will adjust the stack pointer. */
22367 if (GET_CODE (e) != SET
22368 || GET_CODE (XEXP (e, 0)) != REG
22369 || REGNO (XEXP (e, 0)) != SP_REGNUM
22370 || GET_CODE (XEXP (e, 1)) != PLUS)
22373 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22374 nregs = XVECLEN (p, 0) - 1;
22376 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22379 /* The function prologue may also push pc, but not annotate it as it is
22380 never restored. We turn this into a stack pointer adjustment. */
22381 if (nregs * 4 == offset - 4)
22383 fprintf (asm_out_file, "\t.pad #4\n");
22387 fprintf (asm_out_file, "\t.save {");
22389 else if (IS_VFP_REGNUM (reg))
22392 fprintf (asm_out_file, "\t.vsave {");
22394 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22396 /* FPA registers are done differently. */
22397 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22401 /* Unknown register type. */
22404 /* If the stack increment doesn't match the size of the saved registers,
22405 something has gone horribly wrong. */
22406 if (offset != nregs * reg_size)
22411 /* The remaining insns will describe the stores. */
22412 for (i = 1; i <= nregs; i++)
22414 /* Expect (set (mem <addr>) (reg)).
22415 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22416 e = XVECEXP (p, 0, i);
22417 if (GET_CODE (e) != SET
22418 || GET_CODE (XEXP (e, 0)) != MEM
22419 || GET_CODE (XEXP (e, 1)) != REG)
22422 reg = REGNO (XEXP (e, 1));
22427 fprintf (asm_out_file, ", ");
22428 /* We can't use %r for vfp because we need to use the
22429 double precision register names. */
22430 if (IS_VFP_REGNUM (reg))
22431 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22433 asm_fprintf (asm_out_file, "%r", reg);
22435 #ifdef ENABLE_CHECKING
22436 /* Check that the addresses are consecutive. */
22437 e = XEXP (XEXP (e, 0), 0);
22438 if (GET_CODE (e) == PLUS)
22440 offset += reg_size;
22441 if (GET_CODE (XEXP (e, 0)) != REG
22442 || REGNO (XEXP (e, 0)) != SP_REGNUM
22443 || GET_CODE (XEXP (e, 1)) != CONST_INT
22444 || offset != INTVAL (XEXP (e, 1)))
22448 || GET_CODE (e) != REG
22449 || REGNO (e) != SP_REGNUM)
22453 fprintf (asm_out_file, "}\n");
22456 /* Emit unwind directives for a SET. */
22459 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22467 switch (GET_CODE (e0))
22470 /* Pushing a single register. */
22471 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22472 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22473 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22476 asm_fprintf (asm_out_file, "\t.save ");
22477 if (IS_VFP_REGNUM (REGNO (e1)))
22478 asm_fprintf(asm_out_file, "{d%d}\n",
22479 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22481 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22485 if (REGNO (e0) == SP_REGNUM)
22487 /* A stack increment. */
22488 if (GET_CODE (e1) != PLUS
22489 || GET_CODE (XEXP (e1, 0)) != REG
22490 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22491 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22494 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22495 -INTVAL (XEXP (e1, 1)));
22497 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22499 HOST_WIDE_INT offset;
22501 if (GET_CODE (e1) == PLUS)
22503 if (GET_CODE (XEXP (e1, 0)) != REG
22504 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22506 reg = REGNO (XEXP (e1, 0));
22507 offset = INTVAL (XEXP (e1, 1));
22508 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22509 HARD_FRAME_POINTER_REGNUM, reg,
22512 else if (GET_CODE (e1) == REG)
22515 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22516 HARD_FRAME_POINTER_REGNUM, reg);
22521 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22523 /* Move from sp to reg. */
22524 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22526 else if (GET_CODE (e1) == PLUS
22527 && GET_CODE (XEXP (e1, 0)) == REG
22528 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22529 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22531 /* Set reg to offset from sp. */
22532 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22533 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22535 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22537 /* Stack pointer save before alignment. */
22539 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22552 /* Emit unwind directives for the given insn. */
22555 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22559 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22562 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22563 && (TREE_NOTHROW (current_function_decl)
22564 || crtl->all_throwers_are_sibcalls))
22567 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22570 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22572 pat = XEXP (pat, 0);
22574 pat = PATTERN (insn);
22576 switch (GET_CODE (pat))
22579 arm_unwind_emit_set (asm_out_file, pat);
22583 /* Store multiple. */
22584 arm_unwind_emit_sequence (asm_out_file, pat);
22593 /* Output a reference from a function exception table to the type_info
22594 object X. The EABI specifies that the symbol should be relocated by
22595 an R_ARM_TARGET2 relocation. */
22598 arm_output_ttype (rtx x)
22600 fputs ("\t.word\t", asm_out_file);
22601 output_addr_const (asm_out_file, x);
22602 /* Use special relocations for symbol references. */
22603 if (GET_CODE (x) != CONST_INT)
22604 fputs ("(TARGET2)", asm_out_file);
22605 fputc ('\n', asm_out_file);
22610 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22613 arm_asm_emit_except_personality (rtx personality)
22615 fputs ("\t.personality\t", asm_out_file);
22616 output_addr_const (asm_out_file, personality);
22617 fputc ('\n', asm_out_file);
22620 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22623 arm_asm_init_sections (void)
22625 exception_section = get_unnamed_section (0, output_section_asm_op,
22628 #endif /* ARM_UNWIND_INFO */
22630 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22632 static enum unwind_info_type
22633 arm_except_unwind_info (struct gcc_options *opts)
22635 /* Honor the --enable-sjlj-exceptions configure switch. */
22636 #ifdef CONFIG_SJLJ_EXCEPTIONS
22637 if (CONFIG_SJLJ_EXCEPTIONS)
22641 /* If not using ARM EABI unwind tables... */
22642 if (ARM_UNWIND_INFO)
22644 /* For simplicity elsewhere in this file, indicate that all unwind
22645 info is disabled if we're not emitting unwind tables. */
22646 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22652 /* ... we use sjlj exceptions for backwards compatibility. */
22657 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22658 stack alignment. */
22661 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22663 rtx unspec = SET_SRC (pattern);
22664 gcc_assert (GET_CODE (unspec) == UNSPEC);
22668 case UNSPEC_STACK_ALIGN:
22669 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22670 put anything on the stack, so hopefully it won't matter.
22671 CFA = SP will be correct after alignment. */
22672 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22673 SET_DEST (pattern));
22676 gcc_unreachable ();
22681 /* Output unwind directives for the start/end of a function. */
22684 arm_output_fn_unwind (FILE * f, bool prologue)
22686 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22690 fputs ("\t.fnstart\n", f);
22693 /* If this function will never be unwound, then mark it as such.
22694 The came condition is used in arm_unwind_emit to suppress
22695 the frame annotations. */
22696 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22697 && (TREE_NOTHROW (current_function_decl)
22698 || crtl->all_throwers_are_sibcalls))
22699 fputs("\t.cantunwind\n", f);
22701 fputs ("\t.fnend\n", f);
22706 arm_emit_tls_decoration (FILE *fp, rtx x)
22708 enum tls_reloc reloc;
22711 val = XVECEXP (x, 0, 0);
22712 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22714 output_addr_const (fp, val);
22719 fputs ("(tlsgd)", fp);
22722 fputs ("(tlsldm)", fp);
22725 fputs ("(tlsldo)", fp);
22728 fputs ("(gottpoff)", fp);
22731 fputs ("(tpoff)", fp);
22734 gcc_unreachable ();
22742 fputs (" + (. - ", fp);
22743 output_addr_const (fp, XVECEXP (x, 0, 2));
22745 output_addr_const (fp, XVECEXP (x, 0, 3));
22755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22758 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22760 gcc_assert (size == 4);
22761 fputs ("\t.word\t", file);
22762 output_addr_const (file, x);
22763 fputs ("(tlsldo)", file);
22766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22769 arm_output_addr_const_extra (FILE *fp, rtx x)
22771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22772 return arm_emit_tls_decoration (fp, x);
22773 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22776 int labelno = INTVAL (XVECEXP (x, 0, 0));
22778 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22779 assemble_name_raw (fp, label);
22783 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22785 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22789 output_addr_const (fp, XVECEXP (x, 0, 0));
22793 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22795 output_addr_const (fp, XVECEXP (x, 0, 0));
22799 output_addr_const (fp, XVECEXP (x, 0, 1));
22803 else if (GET_CODE (x) == CONST_VECTOR)
22804 return arm_emit_vector_const (fp, x);
22809 /* Output assembly for a shift instruction.
22810 SET_FLAGS determines how the instruction modifies the condition codes.
22811 0 - Do not set condition codes.
22812 1 - Set condition codes.
22813 2 - Use smallest instruction. */
22815 arm_output_shift(rtx * operands, int set_flags)
22818 static const char flag_chars[3] = {'?', '.', '!'};
22823 c = flag_chars[set_flags];
22824 if (TARGET_UNIFIED_ASM)
22826 shift = shift_op(operands[3], &val);
22830 operands[2] = GEN_INT(val);
22831 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22834 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22837 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22838 output_asm_insn (pattern, operands);
22842 /* Output a Thumb-1 casesi dispatch sequence. */
22844 thumb1_output_casesi (rtx *operands)
22846 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22848 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22850 switch (GET_MODE(diff_vec))
22853 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22854 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22856 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22857 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22859 return "bl\t%___gnu_thumb1_case_si";
22861 gcc_unreachable ();
22865 /* Output a Thumb-2 casesi instruction. */
22867 thumb2_output_casesi (rtx *operands)
22869 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22871 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22873 output_asm_insn ("cmp\t%0, %1", operands);
22874 output_asm_insn ("bhi\t%l3", operands);
22875 switch (GET_MODE(diff_vec))
22878 return "tbb\t[%|pc, %0]";
22880 return "tbh\t[%|pc, %0, lsl #1]";
22884 output_asm_insn ("adr\t%4, %l2", operands);
22885 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22886 output_asm_insn ("add\t%4, %4, %5", operands);
22891 output_asm_insn ("adr\t%4, %l2", operands);
22892 return "ldr\t%|pc, [%4, %0, lsl #2]";
22895 gcc_unreachable ();
22899 /* Most ARM cores are single issue, but some newer ones can dual issue.
22900 The scheduler descriptions rely on this being correct. */
22902 arm_issue_rate (void)
22919 /* A table and a function to perform ARM-specific name mangling for
22920 NEON vector types in order to conform to the AAPCS (see "Procedure
22921 Call Standard for the ARM Architecture", Appendix A). To qualify
22922 for emission with the mangled names defined in that document, a
22923 vector type must not only be of the correct mode but also be
22924 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22927 enum machine_mode mode;
22928 const char *element_type_name;
22929 const char *aapcs_name;
22930 } arm_mangle_map_entry;
22932 static arm_mangle_map_entry arm_mangle_map[] = {
22933 /* 64-bit containerized types. */
22934 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22935 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22936 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22937 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22938 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22939 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22940 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22941 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22942 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22943 /* 128-bit containerized types. */
22944 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22945 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22946 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22947 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22948 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22949 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22950 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22951 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22952 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22953 { VOIDmode, NULL, NULL }
22957 arm_mangle_type (const_tree type)
22959 arm_mangle_map_entry *pos = arm_mangle_map;
22961 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22962 has to be managled as if it is in the "std" namespace. */
22963 if (TARGET_AAPCS_BASED
22964 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22966 static bool warned;
22967 if (!warned && warn_psabi && !in_system_header)
22970 inform (input_location,
22971 "the mangling of %<va_list%> has changed in GCC 4.4");
22973 return "St9__va_list";
22976 /* Half-precision float. */
22977 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22980 if (TREE_CODE (type) != VECTOR_TYPE)
22983 /* Check the mode of the vector type, and the name of the vector
22984 element type, against the table. */
22985 while (pos->mode != VOIDmode)
22987 tree elt_type = TREE_TYPE (type);
22989 if (pos->mode == TYPE_MODE (type)
22990 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22991 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22992 pos->element_type_name))
22993 return pos->aapcs_name;
22998 /* Use the default mangling for unrecognized (possibly user-defined)
23003 /* Order of allocation of core registers for Thumb: this allocation is
23004 written over the corresponding initial entries of the array
23005 initialized with REG_ALLOC_ORDER. We allocate all low registers
23006 first. Saving and restoring a low register is usually cheaper than
23007 using a call-clobbered high register. */
23009 static const int thumb_core_reg_alloc_order[] =
23011 3, 2, 1, 0, 4, 5, 6, 7,
23012 14, 12, 8, 9, 10, 11, 13, 15
23015 /* Adjust register allocation order when compiling for Thumb. */
23018 arm_order_regs_for_local_alloc (void)
23020 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23021 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23023 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23024 sizeof (thumb_core_reg_alloc_order));
23027 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23030 arm_frame_pointer_required (void)
23032 return (cfun->has_nonlocal_label
23033 || SUBTARGET_FRAME_POINTER_REQUIRED
23034 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23037 /* Only thumb1 can't support conditional execution, so return true if
23038 the target is not thumb1. */
23040 arm_have_conditional_execution (void)
23042 return !TARGET_THUMB1;
23045 /* Legitimize a memory reference for sync primitive implemented using
23046 ldrex / strex. We currently force the form of the reference to be
23047 indirect without offset. We do not yet support the indirect offset
23048 addressing supported by some ARM targets for these
23051 arm_legitimize_sync_memory (rtx memory)
23053 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23054 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23056 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23057 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23058 return legitimate_memory;
23061 /* An instruction emitter. */
23062 typedef void (* emit_f) (int label, const char *, rtx *);
23064 /* An instruction emitter that emits via the conventional
23065 output_asm_insn. */
23067 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23069 output_asm_insn (pattern, operands);
23072 /* Count the number of emitted synchronization instructions. */
23073 static unsigned arm_insn_count;
23075 /* An emitter that counts emitted instructions but does not actually
23076 emit instruction into the the instruction stream. */
23078 arm_count (int label,
23079 const char *pattern ATTRIBUTE_UNUSED,
23080 rtx *operands ATTRIBUTE_UNUSED)
23086 /* Construct a pattern using conventional output formatting and feed
23087 it to output_asm_insn. Provides a mechanism to construct the
23088 output pattern on the fly. Note the hard limit on the pattern
23090 static void ATTRIBUTE_PRINTF_4
23091 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23092 const char *pattern, ...)
23097 va_start (ap, pattern);
23098 vsprintf (buffer, pattern, ap);
23100 emit (label, buffer, operands);
23103 /* Emit the memory barrier instruction, if any, provided by this
23104 target to a specified emitter. */
23106 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23108 if (TARGET_HAVE_DMB)
23110 /* Note we issue a system level barrier. We should consider
23111 issuing a inner shareabilty zone barrier here instead, ie.
23113 emit (0, "dmb\tsy", operands);
23117 if (TARGET_HAVE_DMB_MCR)
23119 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23123 gcc_unreachable ();
23126 /* Emit the memory barrier instruction, if any, provided by this
23129 arm_output_memory_barrier (rtx *operands)
23131 arm_process_output_memory_barrier (arm_emit, operands);
23135 /* Helper to figure out the instruction suffix required on ldrex/strex
23136 for operations on an object of the specified mode. */
23137 static const char *
23138 arm_ldrex_suffix (enum machine_mode mode)
23142 case QImode: return "b";
23143 case HImode: return "h";
23144 case SImode: return "";
23145 case DImode: return "d";
23147 gcc_unreachable ();
23152 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23155 arm_output_ldrex (emit_f emit,
23156 enum machine_mode mode,
23160 const char *suffix = arm_ldrex_suffix (mode);
23163 operands[0] = target;
23164 operands[1] = memory;
23165 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23168 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23171 arm_output_strex (emit_f emit,
23172 enum machine_mode mode,
23178 const char *suffix = arm_ldrex_suffix (mode);
23181 operands[0] = result;
23182 operands[1] = value;
23183 operands[2] = memory;
23184 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23188 /* Helper to emit a two operand instruction. */
23190 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23196 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23199 /* Helper to emit a three operand instruction. */
23201 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23208 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23211 /* Emit a load store exclusive synchronization loop.
23215 if old_value != required_value
23217 t1 = sync_op (old_value, new_value)
23218 [mem] = t1, t2 = [0|1]
23222 t1 == t2 is not permitted
23223 t1 == old_value is permitted
23227 RTX register or const_int representing the required old_value for
23228 the modify to continue, if NULL no comparsion is performed. */
23230 arm_output_sync_loop (emit_f emit,
23231 enum machine_mode mode,
23234 rtx required_value,
23238 enum attr_sync_op sync_op,
23239 int early_barrier_required)
23243 gcc_assert (t1 != t2);
23245 if (early_barrier_required)
23246 arm_process_output_memory_barrier (emit, NULL);
23248 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23250 arm_output_ldrex (emit, mode, old_value, memory);
23252 if (required_value)
23256 operands[0] = old_value;
23257 operands[1] = required_value;
23258 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23259 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23265 arm_output_op3 (emit, "add", t1, old_value, new_value);
23269 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23273 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23277 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23281 arm_output_op3 (emit,"and", t1, old_value, new_value);
23285 arm_output_op3 (emit, "and", t1, old_value, new_value);
23286 arm_output_op2 (emit, "mvn", t1, t1);
23296 arm_output_strex (emit, mode, "", t2, t1, memory);
23298 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23299 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23300 LOCAL_LABEL_PREFIX);
23304 /* Use old_value for the return value because for some operations
23305 the old_value can easily be restored. This saves one register. */
23306 arm_output_strex (emit, mode, "", old_value, t1, memory);
23307 operands[0] = old_value;
23308 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23309 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23310 LOCAL_LABEL_PREFIX);
23315 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23319 arm_output_op3 (emit, "add", old_value, t1, new_value);
23323 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23327 arm_output_op2 (emit, "mov", old_value, required_value);
23331 gcc_unreachable ();
23335 arm_process_output_memory_barrier (emit, NULL);
23336 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23340 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23343 default_value = operands[index - 1];
23345 return default_value;
23348 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23349 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23351 /* Extract the operands for a synchroniztion instruction from the
23352 instructions attributes and emit the instruction. */
23354 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23356 rtx result, memory, required_value, new_value, t1, t2;
23358 enum machine_mode mode;
23359 enum attr_sync_op sync_op;
23361 result = FETCH_SYNC_OPERAND(result, 0);
23362 memory = FETCH_SYNC_OPERAND(memory, 0);
23363 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23364 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23365 t1 = FETCH_SYNC_OPERAND(t1, 0);
23366 t2 = FETCH_SYNC_OPERAND(t2, 0);
23368 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23369 sync_op = get_attr_sync_op (insn);
23370 mode = GET_MODE (memory);
23372 arm_output_sync_loop (emit, mode, result, memory, required_value,
23373 new_value, t1, t2, sync_op, early_barrier);
23376 /* Emit a synchronization instruction loop. */
23378 arm_output_sync_insn (rtx insn, rtx *operands)
23380 arm_process_output_sync_insn (arm_emit, insn, operands);
23384 /* Count the number of machine instruction that will be emitted for a
23385 synchronization instruction. Note that the emitter used does not
23386 emit instructions, it just counts instructions being carefull not
23387 to count labels. */
23389 arm_sync_loop_insns (rtx insn, rtx *operands)
23391 arm_insn_count = 0;
23392 arm_process_output_sync_insn (arm_count, insn, operands);
23393 return arm_insn_count;
23396 /* Helper to call a target sync instruction generator, dealing with
23397 the variation in operands required by the different generators. */
23399 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23400 rtx memory, rtx required_value, rtx new_value)
23402 switch (generator->op)
23404 case arm_sync_generator_omn:
23405 gcc_assert (! required_value);
23406 return generator->u.omn (old_value, memory, new_value);
23408 case arm_sync_generator_omrn:
23409 gcc_assert (required_value);
23410 return generator->u.omrn (old_value, memory, required_value, new_value);
23416 /* Expand a synchronization loop. The synchronization loop is expanded
23417 as an opaque block of instructions in order to ensure that we do
23418 not subsequently get extraneous memory accesses inserted within the
23419 critical region. The exclusive access property of ldrex/strex is
23420 only guaranteed in there are no intervening memory accesses. */
23422 arm_expand_sync (enum machine_mode mode,
23423 struct arm_sync_generator *generator,
23424 rtx target, rtx memory, rtx required_value, rtx new_value)
23426 if (target == NULL)
23427 target = gen_reg_rtx (mode);
23429 memory = arm_legitimize_sync_memory (memory);
23430 if (mode != SImode)
23432 rtx load_temp = gen_reg_rtx (SImode);
23434 if (required_value)
23435 required_value = convert_modes (SImode, mode, required_value, true);
23437 new_value = convert_modes (SImode, mode, new_value, true);
23438 emit_insn (arm_call_generator (generator, load_temp, memory,
23439 required_value, new_value));
23440 emit_move_insn (target, gen_lowpart (mode, load_temp));
23444 emit_insn (arm_call_generator (generator, target, memory, required_value,
23450 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23452 /* Vectors which aren't in packed structures will not be less aligned than
23453 the natural alignment of their element type, so this is safe. */
23454 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23457 return default_builtin_vector_alignment_reachable (type, is_packed);
23461 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23462 const_tree type, int misalignment,
23465 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23467 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23472 /* If the misalignment is unknown, we should be able to handle the access
23473 so long as it is not to a member of a packed data structure. */
23474 if (misalignment == -1)
23477 /* Return true if the misalignment is a multiple of the natural alignment
23478 of the vector's element type. This is probably always going to be
23479 true in practice, since we've already established that this isn't a
23481 return ((misalignment % align) == 0);
23484 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23489 arm_conditional_register_usage (void)
23493 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23495 for (regno = FIRST_FPA_REGNUM;
23496 regno <= LAST_FPA_REGNUM; ++regno)
23497 fixed_regs[regno] = call_used_regs[regno] = 1;
23500 if (TARGET_THUMB1 && optimize_size)
23502 /* When optimizing for size on Thumb-1, it's better not
23503 to use the HI regs, because of the overhead of
23505 for (regno = FIRST_HI_REGNUM;
23506 regno <= LAST_HI_REGNUM; ++regno)
23507 fixed_regs[regno] = call_used_regs[regno] = 1;
23510 /* The link register can be clobbered by any branch insn,
23511 but we have no way to track that at present, so mark
23512 it as unavailable. */
23514 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23516 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23518 if (TARGET_MAVERICK)
23520 for (regno = FIRST_FPA_REGNUM;
23521 regno <= LAST_FPA_REGNUM; ++ regno)
23522 fixed_regs[regno] = call_used_regs[regno] = 1;
23523 for (regno = FIRST_CIRRUS_FP_REGNUM;
23524 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23526 fixed_regs[regno] = 0;
23527 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23532 /* VFPv3 registers are disabled when earlier VFP
23533 versions are selected due to the definition of
23534 LAST_VFP_REGNUM. */
23535 for (regno = FIRST_VFP_REGNUM;
23536 regno <= LAST_VFP_REGNUM; ++ regno)
23538 fixed_regs[regno] = 0;
23539 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23540 || regno >= FIRST_VFP_REGNUM + 32;
23545 if (TARGET_REALLY_IWMMXT)
23547 regno = FIRST_IWMMXT_GR_REGNUM;
23548 /* The 2002/10/09 revision of the XScale ABI has wCG0
23549 and wCG1 as call-preserved registers. The 2002/11/21
23550 revision changed this so that all wCG registers are
23551 scratch registers. */
23552 for (regno = FIRST_IWMMXT_GR_REGNUM;
23553 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23554 fixed_regs[regno] = 0;
23555 /* The XScale ABI has wR0 - wR9 as scratch registers,
23556 the rest as call-preserved registers. */
23557 for (regno = FIRST_IWMMXT_REGNUM;
23558 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23560 fixed_regs[regno] = 0;
23561 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23565 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23567 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23568 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23570 else if (TARGET_APCS_STACK)
23572 fixed_regs[10] = 1;
23573 call_used_regs[10] = 1;
23575 /* -mcaller-super-interworking reserves r11 for calls to
23576 _interwork_r11_call_via_rN(). Making the register global
23577 is an easy way of ensuring that it remains valid for all
23579 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23580 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23582 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23583 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23584 if (TARGET_CALLER_INTERWORKING)
23585 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23587 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23591 arm_preferred_rename_class (reg_class_t rclass)
23593 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23594 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23595 and code size can be reduced. */
23596 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23602 #include "gt-arm.h"