1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
181 static void arm_encode_section_info (tree, rtx, int);
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
250 static void arm_conditional_register_usage (void);
253 /* Table of machine attributes. */
254 static const struct attribute_spec arm_attribute_table[] =
256 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
257 /* Function calls made to this symbol must be done indirectly, because
258 it may lie outside of the 26 bit addressing range of a normal function
260 { "long_call", 0, 0, false, true, true, NULL },
261 /* Whereas these functions are always known to reside within the 26 bit
263 { "short_call", 0, 0, false, true, true, NULL },
264 /* Specify the procedure call conventions for a function. */
265 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
266 /* Interrupt Service Routines have special prologue and epilogue requirements. */
267 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
268 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
269 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
271 /* ARM/PE has three new attributes:
273 dllexport - for exporting a function/variable that will live in a dll
274 dllimport - for importing a function/variable from a dll
276 Microsoft allows multiple declspecs in one __declspec, separating
277 them with spaces. We do NOT support this. Instead, use __declspec
280 { "dllimport", 0, 0, true, false, false, NULL },
281 { "dllexport", 0, 0, true, false, false, NULL },
282 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
283 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
284 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
285 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
286 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
288 { NULL, 0, 0, false, false, false, NULL }
291 /* Set default optimization options. */
292 static const struct default_options arm_option_optimization_table[] =
294 /* Enable section anchors by default at -O1 or higher. */
295 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
296 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
297 { OPT_LEVELS_NONE, 0, NULL, 0 }
300 /* Initialize the GCC target structure. */
301 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
302 #undef TARGET_MERGE_DECL_ATTRIBUTES
303 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
306 #undef TARGET_LEGITIMIZE_ADDRESS
307 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
309 #undef TARGET_ATTRIBUTE_TABLE
310 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
312 #undef TARGET_ASM_FILE_START
313 #define TARGET_ASM_FILE_START arm_file_start
314 #undef TARGET_ASM_FILE_END
315 #define TARGET_ASM_FILE_END arm_file_end
317 #undef TARGET_ASM_ALIGNED_SI_OP
318 #define TARGET_ASM_ALIGNED_SI_OP NULL
319 #undef TARGET_ASM_INTEGER
320 #define TARGET_ASM_INTEGER arm_assemble_integer
322 #undef TARGET_PRINT_OPERAND
323 #define TARGET_PRINT_OPERAND arm_print_operand
324 #undef TARGET_PRINT_OPERAND_ADDRESS
325 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
326 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
327 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
329 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
330 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
332 #undef TARGET_ASM_FUNCTION_PROLOGUE
333 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
335 #undef TARGET_ASM_FUNCTION_EPILOGUE
336 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
338 #undef TARGET_DEFAULT_TARGET_FLAGS
339 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
340 #undef TARGET_HANDLE_OPTION
341 #define TARGET_HANDLE_OPTION arm_handle_option
343 #define TARGET_HELP arm_target_help
344 #undef TARGET_OPTION_OVERRIDE
345 #define TARGET_OPTION_OVERRIDE arm_option_override
346 #undef TARGET_OPTION_OPTIMIZATION_TABLE
347 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
349 #undef TARGET_COMP_TYPE_ATTRIBUTES
350 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
352 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
353 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
355 #undef TARGET_SCHED_ADJUST_COST
356 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
358 #undef TARGET_ENCODE_SECTION_INFO
360 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
362 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
365 #undef TARGET_STRIP_NAME_ENCODING
366 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
368 #undef TARGET_ASM_INTERNAL_LABEL
369 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
371 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
372 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
374 #undef TARGET_FUNCTION_VALUE
375 #define TARGET_FUNCTION_VALUE arm_function_value
377 #undef TARGET_LIBCALL_VALUE
378 #define TARGET_LIBCALL_VALUE arm_libcall_value
380 #undef TARGET_ASM_OUTPUT_MI_THUNK
381 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
382 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
383 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
385 #undef TARGET_RTX_COSTS
386 #define TARGET_RTX_COSTS arm_rtx_costs
387 #undef TARGET_ADDRESS_COST
388 #define TARGET_ADDRESS_COST arm_address_cost
390 #undef TARGET_SHIFT_TRUNCATION_MASK
391 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
392 #undef TARGET_VECTOR_MODE_SUPPORTED_P
393 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
397 #undef TARGET_MACHINE_DEPENDENT_REORG
398 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
400 #undef TARGET_INIT_BUILTINS
401 #define TARGET_INIT_BUILTINS arm_init_builtins
402 #undef TARGET_EXPAND_BUILTIN
403 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
405 #undef TARGET_INIT_LIBFUNCS
406 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
408 #undef TARGET_PROMOTE_FUNCTION_MODE
409 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
410 #undef TARGET_PROMOTE_PROTOTYPES
411 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
412 #undef TARGET_PASS_BY_REFERENCE
413 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
414 #undef TARGET_ARG_PARTIAL_BYTES
415 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
416 #undef TARGET_FUNCTION_ARG
417 #define TARGET_FUNCTION_ARG arm_function_arg
418 #undef TARGET_FUNCTION_ARG_ADVANCE
419 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
420 #undef TARGET_FUNCTION_ARG_BOUNDARY
421 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
423 #undef TARGET_SETUP_INCOMING_VARARGS
424 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
426 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
427 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
429 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
430 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
431 #undef TARGET_TRAMPOLINE_INIT
432 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
433 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
434 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
436 #undef TARGET_DEFAULT_SHORT_ENUMS
437 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
439 #undef TARGET_ALIGN_ANON_BITFIELD
440 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
442 #undef TARGET_NARROW_VOLATILE_BITFIELD
443 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
445 #undef TARGET_CXX_GUARD_TYPE
446 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
448 #undef TARGET_CXX_GUARD_MASK_BIT
449 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
451 #undef TARGET_CXX_GET_COOKIE_SIZE
452 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
454 #undef TARGET_CXX_COOKIE_HAS_SIZE
455 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
457 #undef TARGET_CXX_CDTOR_RETURNS_THIS
458 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
460 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
461 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
463 #undef TARGET_CXX_USE_AEABI_ATEXIT
464 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
466 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
467 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
468 arm_cxx_determine_class_data_visibility
470 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
471 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
473 #undef TARGET_RETURN_IN_MSB
474 #define TARGET_RETURN_IN_MSB arm_return_in_msb
476 #undef TARGET_RETURN_IN_MEMORY
477 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
483 #undef TARGET_ASM_UNWIND_EMIT
484 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
486 /* EABI unwinding tables use a different format for the typeinfo tables. */
487 #undef TARGET_ASM_TTYPE
488 #define TARGET_ASM_TTYPE arm_output_ttype
490 #undef TARGET_ARM_EABI_UNWINDER
491 #define TARGET_ARM_EABI_UNWINDER true
493 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
494 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
496 #undef TARGET_ASM_INIT_SECTIONS
497 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
498 #endif /* ARM_UNWIND_INFO */
500 #undef TARGET_EXCEPT_UNWIND_INFO
501 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
503 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
504 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
506 #undef TARGET_DWARF_REGISTER_SPAN
507 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
509 #undef TARGET_CANNOT_COPY_INSN_P
510 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
513 #undef TARGET_HAVE_TLS
514 #define TARGET_HAVE_TLS true
517 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
518 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
520 #undef TARGET_CANNOT_FORCE_CONST_MEM
521 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
523 #undef TARGET_MAX_ANCHOR_OFFSET
524 #define TARGET_MAX_ANCHOR_OFFSET 4095
526 /* The minimum is set such that the total size of the block
527 for a particular anchor is -4088 + 1 + 4095 bytes, which is
528 divisible by eight, ensuring natural spacing of anchors. */
529 #undef TARGET_MIN_ANCHOR_OFFSET
530 #define TARGET_MIN_ANCHOR_OFFSET -4088
532 #undef TARGET_SCHED_ISSUE_RATE
533 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
535 #undef TARGET_MANGLE_TYPE
536 #define TARGET_MANGLE_TYPE arm_mangle_type
538 #undef TARGET_BUILD_BUILTIN_VA_LIST
539 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
540 #undef TARGET_EXPAND_BUILTIN_VA_START
541 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
543 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
546 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
547 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
550 #undef TARGET_LEGITIMATE_ADDRESS_P
551 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
553 #undef TARGET_INVALID_PARAMETER_TYPE
554 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
556 #undef TARGET_INVALID_RETURN_TYPE
557 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
559 #undef TARGET_PROMOTED_TYPE
560 #define TARGET_PROMOTED_TYPE arm_promoted_type
562 #undef TARGET_CONVERT_TO_TYPE
563 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
565 #undef TARGET_SCALAR_MODE_SUPPORTED_P
566 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
568 #undef TARGET_FRAME_POINTER_REQUIRED
569 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
571 #undef TARGET_CAN_ELIMINATE
572 #define TARGET_CAN_ELIMINATE arm_can_eliminate
574 #undef TARGET_CONDITIONAL_REGISTER_USAGE
575 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
577 #undef TARGET_CLASS_LIKELY_SPILLED_P
578 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
580 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
581 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
582 arm_vector_alignment_reachable
584 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
585 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
586 arm_builtin_support_vector_misalignment
588 struct gcc_target targetm = TARGET_INITIALIZER;
590 /* Obstack for minipool constant handling. */
591 static struct obstack minipool_obstack;
592 static char * minipool_startobj;
594 /* The maximum number of insns skipped which
595 will be conditionalised if possible. */
596 static int max_insns_skipped = 5;
598 extern FILE * asm_out_file;
600 /* True if we are currently building a constant table. */
601 int making_const_table;
603 /* The processor for which instructions should be scheduled. */
604 enum processor_type arm_tune = arm_none;
606 /* The current tuning set. */
607 const struct tune_params *current_tune;
609 /* Which floating point hardware to schedule for. */
612 /* Which floating popint hardware to use. */
613 const struct arm_fpu_desc *arm_fpu_desc;
615 /* Whether to use floating point hardware. */
616 enum float_abi_type arm_float_abi;
618 /* Which __fp16 format to use. */
619 enum arm_fp16_format_type arm_fp16_format;
621 /* Which ABI to use. */
622 enum arm_abi_type arm_abi;
624 /* Which thread pointer model to use. */
625 enum arm_tp_type target_thread_pointer = TP_AUTO;
627 /* Used to parse -mstructure_size_boundary command line option. */
628 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
630 /* Used for Thumb call_via trampolines. */
631 rtx thumb_call_via_label[14];
632 static int thumb_call_reg_needed;
634 /* Bit values used to identify processor capabilities. */
635 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
636 #define FL_ARCH3M (1 << 1) /* Extended multiply */
637 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
638 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
639 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
640 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
641 #define FL_THUMB (1 << 6) /* Thumb aware */
642 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
643 #define FL_STRONG (1 << 8) /* StrongARM */
644 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
645 #define FL_XSCALE (1 << 10) /* XScale */
646 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
647 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
648 media instructions. */
649 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
650 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
651 Note: ARM6 & 7 derivatives only. */
652 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
653 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
654 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
656 #define FL_DIV (1 << 18) /* Hardware divide. */
657 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
658 #define FL_NEON (1 << 20) /* Neon instructions. */
659 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
661 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
663 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
665 /* Flags that only effect tuning, not available instructions. */
666 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
669 #define FL_FOR_ARCH2 FL_NOTM
670 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
671 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
672 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
673 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
674 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
675 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
676 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
677 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
678 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
679 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
680 #define FL_FOR_ARCH6J FL_FOR_ARCH6
681 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
682 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
683 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
684 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
685 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
686 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
687 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
688 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
689 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
690 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
692 /* The bits in this mask specify which
693 instructions we are allowed to generate. */
694 static unsigned long insn_flags = 0;
696 /* The bits in this mask specify which instruction scheduling options should
698 static unsigned long tune_flags = 0;
700 /* The following are used in the arm.md file as equivalents to bits
701 in the above two flag variables. */
703 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
706 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
709 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
721 /* Nonzero if this chip supports the ARM 6K extensions. */
724 /* Nonzero if this chip supports the ARM 7 extensions. */
727 /* Nonzero if instructions not present in the 'M' profile can be used. */
728 int arm_arch_notm = 0;
730 /* Nonzero if instructions present in ARMv7E-M can be used. */
733 /* Nonzero if this chip can benefit from load scheduling. */
734 int arm_ld_sched = 0;
736 /* Nonzero if this chip is a StrongARM. */
737 int arm_tune_strongarm = 0;
739 /* Nonzero if this chip is a Cirrus variant. */
740 int arm_arch_cirrus = 0;
742 /* Nonzero if this chip supports Intel Wireless MMX technology. */
743 int arm_arch_iwmmxt = 0;
745 /* Nonzero if this chip is an XScale. */
746 int arm_arch_xscale = 0;
748 /* Nonzero if tuning for XScale */
749 int arm_tune_xscale = 0;
751 /* Nonzero if we want to tune for stores that access the write-buffer.
752 This typically means an ARM6 or ARM7 with MMU or MPU. */
753 int arm_tune_wbuf = 0;
755 /* Nonzero if tuning for Cortex-A9. */
756 int arm_tune_cortex_a9 = 0;
758 /* Nonzero if generating Thumb instructions. */
761 /* Nonzero if generating Thumb-1 instructions. */
764 /* Nonzero if we should define __THUMB_INTERWORK__ in the
766 XXX This is a bit of a hack, it's intended to help work around
767 problems in GLD which doesn't understand that armv5t code is
768 interworking clean. */
769 int arm_cpp_interwork = 0;
771 /* Nonzero if chip supports Thumb 2. */
774 /* Nonzero if chip supports integer division instruction. */
777 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
778 we must report the mode of the memory reference from
779 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
780 enum machine_mode output_memory_reference_mode;
782 /* The register number to be used for the PIC offset register. */
783 unsigned arm_pic_register = INVALID_REGNUM;
785 /* Set to 1 after arm_reorg has started. Reset to start at the start of
786 the next function. */
787 static int after_arm_reorg = 0;
789 enum arm_pcs arm_pcs_default;
791 /* For an explanation of these variables, see final_prescan_insn below. */
793 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
794 enum arm_cond_code arm_current_cc;
797 int arm_target_label;
798 /* The number of conditionally executed insns, including the current insn. */
799 int arm_condexec_count = 0;
800 /* A bitmask specifying the patterns for the IT block.
801 Zero means do not output an IT block before this insn. */
802 int arm_condexec_mask = 0;
803 /* The number of bits used in arm_condexec_mask. */
804 int arm_condexec_masklen = 0;
806 /* The condition codes of the ARM, and the inverse function. */
807 static const char * const arm_condition_codes[] =
809 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
810 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
813 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
814 int arm_regs_in_sequence[] =
816 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
819 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
820 #define streq(string1, string2) (strcmp (string1, string2) == 0)
822 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
823 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
824 | (1 << PIC_OFFSET_TABLE_REGNUM)))
826 /* Initialization code. */
830 const char *const name;
831 enum processor_type core;
833 const unsigned long flags;
834 const struct tune_params *const tune;
838 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
839 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
844 const struct tune_params arm_slowmul_tune =
846 arm_slowmul_rtx_costs,
849 ARM_PREFETCH_NOT_BENEFICIAL
852 const struct tune_params arm_fastmul_tune =
854 arm_fastmul_rtx_costs,
857 ARM_PREFETCH_NOT_BENEFICIAL
860 const struct tune_params arm_xscale_tune =
862 arm_xscale_rtx_costs,
863 xscale_sched_adjust_cost,
865 ARM_PREFETCH_NOT_BENEFICIAL
868 const struct tune_params arm_9e_tune =
873 ARM_PREFETCH_NOT_BENEFICIAL
876 const struct tune_params arm_cortex_a9_tune =
879 cortex_a9_sched_adjust_cost,
881 ARM_PREFETCH_BENEFICIAL(4,32,32)
885 /* Not all of these give usefully different compilation alternatives,
886 but there is no simple way of generalizing them. */
887 static const struct processors all_cores[] =
890 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
891 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
892 #include "arm-cores.def"
894 {NULL, arm_none, NULL, 0, NULL}
897 static const struct processors all_architectures[] =
899 /* ARM Architectures */
900 /* We don't specify tuning costs here as it will be figured out
903 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
904 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
905 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
906 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
907 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
908 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
909 implementations that support it, so we will leave it out for now. */
910 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
911 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
912 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
913 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
914 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
915 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
916 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
917 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
918 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
919 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
920 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
921 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
922 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
923 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
924 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
925 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
926 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
927 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
928 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
929 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
930 {NULL, arm_none, NULL, 0 , NULL}
934 /* These are populated as commandline arguments are processed, or NULL
936 static const struct processors *arm_selected_arch;
937 static const struct processors *arm_selected_cpu;
938 static const struct processors *arm_selected_tune;
940 /* The name of the preprocessor macro to define for this architecture. */
942 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
944 /* Available values for -mfpu=. */
946 static const struct arm_fpu_desc all_fpus[] =
948 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
949 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
950 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
951 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
952 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
953 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
954 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
955 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
956 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
957 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
958 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
959 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
960 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
961 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
962 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
963 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
964 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
965 /* Compatibility aliases. */
966 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
973 enum float_abi_type abi_type;
977 /* Available values for -mfloat-abi=. */
979 static const struct float_abi all_float_abis[] =
981 {"soft", ARM_FLOAT_ABI_SOFT},
982 {"softfp", ARM_FLOAT_ABI_SOFTFP},
983 {"hard", ARM_FLOAT_ABI_HARD}
990 enum arm_fp16_format_type fp16_format_type;
994 /* Available values for -mfp16-format=. */
996 static const struct fp16_format all_fp16_formats[] =
998 {"none", ARM_FP16_FORMAT_NONE},
999 {"ieee", ARM_FP16_FORMAT_IEEE},
1000 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1007 enum arm_abi_type abi_type;
1011 /* Available values for -mabi=. */
1013 static const struct abi_name arm_all_abis[] =
1015 {"apcs-gnu", ARM_ABI_APCS},
1016 {"atpcs", ARM_ABI_ATPCS},
1017 {"aapcs", ARM_ABI_AAPCS},
1018 {"iwmmxt", ARM_ABI_IWMMXT},
1019 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1022 /* Supported TLS relocations. */
1032 /* The maximum number of insns to be used when loading a constant. */
1034 arm_constant_limit (bool size_p)
1036 return size_p ? 1 : current_tune->constant_limit;
1039 /* Emit an insn that's a simple single-set. Both the operands must be known
1042 emit_set_insn (rtx x, rtx y)
1044 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1047 /* Return the number of bits set in VALUE. */
1049 bit_count (unsigned long value)
1051 unsigned long count = 0;
1056 value &= value - 1; /* Clear the least-significant set bit. */
1062 /* Set up library functions unique to ARM. */
1065 arm_init_libfuncs (void)
1067 /* There are no special library functions unless we are using the
1072 /* The functions below are described in Section 4 of the "Run-Time
1073 ABI for the ARM architecture", Version 1.0. */
1075 /* Double-precision floating-point arithmetic. Table 2. */
1076 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1077 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1078 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1079 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1080 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1082 /* Double-precision comparisons. Table 3. */
1083 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1084 set_optab_libfunc (ne_optab, DFmode, NULL);
1085 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1086 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1087 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1088 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1089 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1091 /* Single-precision floating-point arithmetic. Table 4. */
1092 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1093 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1094 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1095 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1096 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1098 /* Single-precision comparisons. Table 5. */
1099 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1100 set_optab_libfunc (ne_optab, SFmode, NULL);
1101 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1102 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1103 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1104 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1105 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1107 /* Floating-point to integer conversions. Table 6. */
1108 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1109 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1110 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1111 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1112 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1113 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1114 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1115 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1117 /* Conversions between floating types. Table 7. */
1118 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1119 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1121 /* Integer to floating-point conversions. Table 8. */
1122 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1123 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1124 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1125 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1126 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1127 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1128 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1129 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1131 /* Long long. Table 9. */
1132 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1133 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1134 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1135 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1136 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1137 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1138 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1139 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1141 /* Integer (32/32->32) division. \S 4.3.1. */
1142 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1143 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1145 /* The divmod functions are designed so that they can be used for
1146 plain division, even though they return both the quotient and the
1147 remainder. The quotient is returned in the usual location (i.e.,
1148 r0 for SImode, {r0, r1} for DImode), just as would be expected
1149 for an ordinary division routine. Because the AAPCS calling
1150 conventions specify that all of { r0, r1, r2, r3 } are
1151 callee-saved registers, there is no need to tell the compiler
1152 explicitly that those registers are clobbered by these
1154 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1155 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1157 /* For SImode division the ABI provides div-without-mod routines,
1158 which are faster. */
1159 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1160 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1162 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1163 divmod libcalls instead. */
1164 set_optab_libfunc (smod_optab, DImode, NULL);
1165 set_optab_libfunc (umod_optab, DImode, NULL);
1166 set_optab_libfunc (smod_optab, SImode, NULL);
1167 set_optab_libfunc (umod_optab, SImode, NULL);
1169 /* Half-precision float operations. The compiler handles all operations
1170 with NULL libfuncs by converting the SFmode. */
1171 switch (arm_fp16_format)
1173 case ARM_FP16_FORMAT_IEEE:
1174 case ARM_FP16_FORMAT_ALTERNATIVE:
1177 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1178 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1180 : "__gnu_f2h_alternative"));
1181 set_conv_libfunc (sext_optab, SFmode, HFmode,
1182 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1184 : "__gnu_h2f_alternative"));
1187 set_optab_libfunc (add_optab, HFmode, NULL);
1188 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1189 set_optab_libfunc (smul_optab, HFmode, NULL);
1190 set_optab_libfunc (neg_optab, HFmode, NULL);
1191 set_optab_libfunc (sub_optab, HFmode, NULL);
1194 set_optab_libfunc (eq_optab, HFmode, NULL);
1195 set_optab_libfunc (ne_optab, HFmode, NULL);
1196 set_optab_libfunc (lt_optab, HFmode, NULL);
1197 set_optab_libfunc (le_optab, HFmode, NULL);
1198 set_optab_libfunc (ge_optab, HFmode, NULL);
1199 set_optab_libfunc (gt_optab, HFmode, NULL);
1200 set_optab_libfunc (unord_optab, HFmode, NULL);
1207 if (TARGET_AAPCS_BASED)
1208 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1211 /* On AAPCS systems, this is the "struct __va_list". */
1212 static GTY(()) tree va_list_type;
1214 /* Return the type to use as __builtin_va_list. */
1216 arm_build_builtin_va_list (void)
1221 if (!TARGET_AAPCS_BASED)
1222 return std_build_builtin_va_list ();
1224 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1232 The C Library ABI further reinforces this definition in \S
1235 We must follow this definition exactly. The structure tag
1236 name is visible in C++ mangled names, and thus forms a part
1237 of the ABI. The field name may be used by people who
1238 #include <stdarg.h>. */
1239 /* Create the type. */
1240 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1241 /* Give it the required name. */
1242 va_list_name = build_decl (BUILTINS_LOCATION,
1244 get_identifier ("__va_list"),
1246 DECL_ARTIFICIAL (va_list_name) = 1;
1247 TYPE_NAME (va_list_type) = va_list_name;
1248 TYPE_STUB_DECL (va_list_type) = va_list_name;
1249 /* Create the __ap field. */
1250 ap_field = build_decl (BUILTINS_LOCATION,
1252 get_identifier ("__ap"),
1254 DECL_ARTIFICIAL (ap_field) = 1;
1255 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1256 TYPE_FIELDS (va_list_type) = ap_field;
1257 /* Compute its layout. */
1258 layout_type (va_list_type);
1260 return va_list_type;
1263 /* Return an expression of type "void *" pointing to the next
1264 available argument in a variable-argument list. VALIST is the
1265 user-level va_list object, of type __builtin_va_list. */
1267 arm_extract_valist_ptr (tree valist)
1269 if (TREE_TYPE (valist) == error_mark_node)
1270 return error_mark_node;
1272 /* On an AAPCS target, the pointer is stored within "struct
1274 if (TARGET_AAPCS_BASED)
1276 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1277 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1278 valist, ap_field, NULL_TREE);
1284 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1286 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1288 valist = arm_extract_valist_ptr (valist);
1289 std_expand_builtin_va_start (valist, nextarg);
1292 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1294 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1297 valist = arm_extract_valist_ptr (valist);
1298 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1301 /* Lookup NAME in SEL. */
1303 static const struct processors *
1304 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1306 if (!(name && *name))
1309 for (; sel->name != NULL; sel++)
1311 if (streq (name, sel->name))
1315 error ("bad value (%s) for %s switch", name, desc);
1319 /* Implement TARGET_HANDLE_OPTION. */
1322 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1327 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1331 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1334 case OPT_mhard_float:
1335 target_float_abi_name = "hard";
1338 case OPT_msoft_float:
1339 target_float_abi_name = "soft";
1343 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1352 arm_target_help (void)
1355 static int columns = 0;
1358 /* If we have not done so already, obtain the desired maximum width of
1359 the output. Note - this is a duplication of the code at the start of
1360 gcc/opts.c:print_specific_help() - the two copies should probably be
1361 replaced by a single function. */
1366 p = getenv ("COLUMNS");
1369 int value = atoi (p);
1376 /* Use a reasonable default. */
1380 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1382 /* The - 2 is because we know that the last entry in the array is NULL. */
1383 i = ARRAY_SIZE (all_cores) - 2;
1385 printf (" %s", all_cores[i].name);
1386 remaining = columns - (strlen (all_cores[i].name) + 4);
1387 gcc_assert (remaining >= 0);
1391 int len = strlen (all_cores[i].name);
1393 if (remaining > len + 2)
1395 printf (", %s", all_cores[i].name);
1396 remaining -= len + 2;
1402 printf ("\n %s", all_cores[i].name);
1403 remaining = columns - (len + 4);
1407 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1409 i = ARRAY_SIZE (all_architectures) - 2;
1412 printf (" %s", all_architectures[i].name);
1413 remaining = columns - (strlen (all_architectures[i].name) + 4);
1414 gcc_assert (remaining >= 0);
1418 int len = strlen (all_architectures[i].name);
1420 if (remaining > len + 2)
1422 printf (", %s", all_architectures[i].name);
1423 remaining -= len + 2;
1429 printf ("\n %s", all_architectures[i].name);
1430 remaining = columns - (len + 4);
1437 /* Fix up any incompatible options that the user has specified. */
1439 arm_option_override (void)
1443 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1444 SUBTARGET_OVERRIDE_OPTIONS;
1447 if (arm_selected_arch)
1449 if (arm_selected_cpu)
1451 /* Check for conflict between mcpu and march. */
1452 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1454 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1455 arm_selected_cpu->name, arm_selected_arch->name);
1456 /* -march wins for code generation.
1457 -mcpu wins for default tuning. */
1458 if (!arm_selected_tune)
1459 arm_selected_tune = arm_selected_cpu;
1461 arm_selected_cpu = arm_selected_arch;
1465 arm_selected_arch = NULL;
1468 /* Pick a CPU based on the architecture. */
1469 arm_selected_cpu = arm_selected_arch;
1472 /* If the user did not specify a processor, choose one for them. */
1473 if (!arm_selected_cpu)
1475 const struct processors * sel;
1476 unsigned int sought;
1478 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1479 if (!arm_selected_cpu->name)
1481 #ifdef SUBTARGET_CPU_DEFAULT
1482 /* Use the subtarget default CPU if none was specified by
1484 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1486 /* Default to ARM6. */
1487 if (!arm_selected_cpu->name)
1488 arm_selected_cpu = &all_cores[arm6];
1491 sel = arm_selected_cpu;
1492 insn_flags = sel->flags;
1494 /* Now check to see if the user has specified some command line
1495 switch that require certain abilities from the cpu. */
1498 if (TARGET_INTERWORK || TARGET_THUMB)
1500 sought |= (FL_THUMB | FL_MODE32);
1502 /* There are no ARM processors that support both APCS-26 and
1503 interworking. Therefore we force FL_MODE26 to be removed
1504 from insn_flags here (if it was set), so that the search
1505 below will always be able to find a compatible processor. */
1506 insn_flags &= ~FL_MODE26;
1509 if (sought != 0 && ((sought & insn_flags) != sought))
1511 /* Try to locate a CPU type that supports all of the abilities
1512 of the default CPU, plus the extra abilities requested by
1514 for (sel = all_cores; sel->name != NULL; sel++)
1515 if ((sel->flags & sought) == (sought | insn_flags))
1518 if (sel->name == NULL)
1520 unsigned current_bit_count = 0;
1521 const struct processors * best_fit = NULL;
1523 /* Ideally we would like to issue an error message here
1524 saying that it was not possible to find a CPU compatible
1525 with the default CPU, but which also supports the command
1526 line options specified by the programmer, and so they
1527 ought to use the -mcpu=<name> command line option to
1528 override the default CPU type.
1530 If we cannot find a cpu that has both the
1531 characteristics of the default cpu and the given
1532 command line options we scan the array again looking
1533 for a best match. */
1534 for (sel = all_cores; sel->name != NULL; sel++)
1535 if ((sel->flags & sought) == sought)
1539 count = bit_count (sel->flags & insn_flags);
1541 if (count >= current_bit_count)
1544 current_bit_count = count;
1548 gcc_assert (best_fit);
1552 arm_selected_cpu = sel;
1556 gcc_assert (arm_selected_cpu);
1557 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1558 if (!arm_selected_tune)
1559 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1561 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1562 insn_flags = arm_selected_cpu->flags;
1564 arm_tune = arm_selected_tune->core;
1565 tune_flags = arm_selected_tune->flags;
1566 current_tune = arm_selected_tune->tune;
1568 if (target_fp16_format_name)
1570 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1572 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1574 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1578 if (i == ARRAY_SIZE (all_fp16_formats))
1579 error ("invalid __fp16 format option: -mfp16-format=%s",
1580 target_fp16_format_name);
1583 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1585 if (target_abi_name)
1587 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1589 if (streq (arm_all_abis[i].name, target_abi_name))
1591 arm_abi = arm_all_abis[i].abi_type;
1595 if (i == ARRAY_SIZE (arm_all_abis))
1596 error ("invalid ABI option: -mabi=%s", target_abi_name);
1599 arm_abi = ARM_DEFAULT_ABI;
1601 /* Make sure that the processor choice does not conflict with any of the
1602 other command line choices. */
1603 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1604 error ("target CPU does not support ARM mode");
1606 /* BPABI targets use linker tricks to allow interworking on cores
1607 without thumb support. */
1608 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1610 warning (0, "target CPU does not support interworking" );
1611 target_flags &= ~MASK_INTERWORK;
1614 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1616 warning (0, "target CPU does not support THUMB instructions");
1617 target_flags &= ~MASK_THUMB;
1620 if (TARGET_APCS_FRAME && TARGET_THUMB)
1622 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1623 target_flags &= ~MASK_APCS_FRAME;
1626 /* Callee super interworking implies thumb interworking. Adding
1627 this to the flags here simplifies the logic elsewhere. */
1628 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1629 target_flags |= MASK_INTERWORK;
1631 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1632 from here where no function is being compiled currently. */
1633 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1634 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1636 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1637 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1639 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1641 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1642 target_flags |= MASK_APCS_FRAME;
1645 if (TARGET_POKE_FUNCTION_NAME)
1646 target_flags |= MASK_APCS_FRAME;
1648 if (TARGET_APCS_REENT && flag_pic)
1649 error ("-fpic and -mapcs-reent are incompatible");
1651 if (TARGET_APCS_REENT)
1652 warning (0, "APCS reentrant code not supported. Ignored");
1654 /* If this target is normally configured to use APCS frames, warn if they
1655 are turned off and debugging is turned on. */
1657 && write_symbols != NO_DEBUG
1658 && !TARGET_APCS_FRAME
1659 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1660 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1662 if (TARGET_APCS_FLOAT)
1663 warning (0, "passing floating point arguments in fp regs not yet supported");
1665 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1666 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1667 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1668 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1669 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1670 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1671 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1672 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1673 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1674 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1675 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1676 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1677 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1678 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1680 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1681 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1682 thumb_code = TARGET_ARM == 0;
1683 thumb1_code = TARGET_THUMB1 != 0;
1684 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1685 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1686 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1687 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1688 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1690 /* If we are not using the default (ARM mode) section anchor offset
1691 ranges, then set the correct ranges now. */
1694 /* Thumb-1 LDR instructions cannot have negative offsets.
1695 Permissible positive offset ranges are 5-bit (for byte loads),
1696 6-bit (for halfword loads), or 7-bit (for word loads).
1697 Empirical results suggest a 7-bit anchor range gives the best
1698 overall code size. */
1699 targetm.min_anchor_offset = 0;
1700 targetm.max_anchor_offset = 127;
1702 else if (TARGET_THUMB2)
1704 /* The minimum is set such that the total size of the block
1705 for a particular anchor is 248 + 1 + 4095 bytes, which is
1706 divisible by eight, ensuring natural spacing of anchors. */
1707 targetm.min_anchor_offset = -248;
1708 targetm.max_anchor_offset = 4095;
1711 /* V5 code we generate is completely interworking capable, so we turn off
1712 TARGET_INTERWORK here to avoid many tests later on. */
1714 /* XXX However, we must pass the right pre-processor defines to CPP
1715 or GLD can get confused. This is a hack. */
1716 if (TARGET_INTERWORK)
1717 arm_cpp_interwork = 1;
1720 target_flags &= ~MASK_INTERWORK;
1722 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1723 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1725 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1726 error ("iwmmxt abi requires an iwmmxt capable cpu");
1728 if (target_fpu_name == NULL && target_fpe_name != NULL)
1730 if (streq (target_fpe_name, "2"))
1731 target_fpu_name = "fpe2";
1732 else if (streq (target_fpe_name, "3"))
1733 target_fpu_name = "fpe3";
1735 error ("invalid floating point emulation option: -mfpe=%s",
1739 if (target_fpu_name == NULL)
1741 #ifdef FPUTYPE_DEFAULT
1742 target_fpu_name = FPUTYPE_DEFAULT;
1744 if (arm_arch_cirrus)
1745 target_fpu_name = "maverick";
1747 target_fpu_name = "fpe2";
1751 arm_fpu_desc = NULL;
1752 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1754 if (streq (all_fpus[i].name, target_fpu_name))
1756 arm_fpu_desc = &all_fpus[i];
1763 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1767 switch (arm_fpu_desc->model)
1769 case ARM_FP_MODEL_FPA:
1770 if (arm_fpu_desc->rev == 2)
1771 arm_fpu_attr = FPU_FPE2;
1772 else if (arm_fpu_desc->rev == 3)
1773 arm_fpu_attr = FPU_FPE3;
1775 arm_fpu_attr = FPU_FPA;
1778 case ARM_FP_MODEL_MAVERICK:
1779 arm_fpu_attr = FPU_MAVERICK;
1782 case ARM_FP_MODEL_VFP:
1783 arm_fpu_attr = FPU_VFP;
1790 if (target_float_abi_name != NULL)
1792 /* The user specified a FP ABI. */
1793 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1795 if (streq (all_float_abis[i].name, target_float_abi_name))
1797 arm_float_abi = all_float_abis[i].abi_type;
1801 if (i == ARRAY_SIZE (all_float_abis))
1802 error ("invalid floating point abi: -mfloat-abi=%s",
1803 target_float_abi_name);
1806 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1808 if (TARGET_AAPCS_BASED
1809 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1810 error ("FPA is unsupported in the AAPCS");
1812 if (TARGET_AAPCS_BASED)
1814 if (TARGET_CALLER_INTERWORKING)
1815 error ("AAPCS does not support -mcaller-super-interworking");
1817 if (TARGET_CALLEE_INTERWORKING)
1818 error ("AAPCS does not support -mcallee-super-interworking");
1821 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1822 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1823 will ever exist. GCC makes no attempt to support this combination. */
1824 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1825 sorry ("iWMMXt and hardware floating point");
1827 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1828 if (TARGET_THUMB2 && TARGET_IWMMXT)
1829 sorry ("Thumb-2 iWMMXt");
1831 /* __fp16 support currently assumes the core has ldrh. */
1832 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1833 sorry ("__fp16 and no ldrh");
1835 /* If soft-float is specified then don't use FPU. */
1836 if (TARGET_SOFT_FLOAT)
1837 arm_fpu_attr = FPU_NONE;
1839 if (TARGET_AAPCS_BASED)
1841 if (arm_abi == ARM_ABI_IWMMXT)
1842 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1843 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1844 && TARGET_HARD_FLOAT
1846 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1848 arm_pcs_default = ARM_PCS_AAPCS;
1852 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1853 sorry ("-mfloat-abi=hard and VFP");
1855 if (arm_abi == ARM_ABI_APCS)
1856 arm_pcs_default = ARM_PCS_APCS;
1858 arm_pcs_default = ARM_PCS_ATPCS;
1861 /* For arm2/3 there is no need to do any scheduling if there is only
1862 a floating point emulator, or we are doing software floating-point. */
1863 if ((TARGET_SOFT_FLOAT
1864 || (TARGET_FPA && arm_fpu_desc->rev))
1865 && (tune_flags & FL_MODE32) == 0)
1866 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1868 if (target_thread_switch)
1870 if (strcmp (target_thread_switch, "soft") == 0)
1871 target_thread_pointer = TP_SOFT;
1872 else if (strcmp (target_thread_switch, "auto") == 0)
1873 target_thread_pointer = TP_AUTO;
1874 else if (strcmp (target_thread_switch, "cp15") == 0)
1875 target_thread_pointer = TP_CP15;
1877 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1880 /* Use the cp15 method if it is available. */
1881 if (target_thread_pointer == TP_AUTO)
1883 if (arm_arch6k && !TARGET_THUMB1)
1884 target_thread_pointer = TP_CP15;
1886 target_thread_pointer = TP_SOFT;
1889 if (TARGET_HARD_TP && TARGET_THUMB1)
1890 error ("can not use -mtp=cp15 with 16-bit Thumb");
1892 /* Override the default structure alignment for AAPCS ABI. */
1893 if (TARGET_AAPCS_BASED)
1894 arm_structure_size_boundary = 8;
1896 if (structure_size_string != NULL)
1898 int size = strtol (structure_size_string, NULL, 0);
1900 if (size == 8 || size == 32
1901 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1902 arm_structure_size_boundary = size;
1904 warning (0, "structure size boundary can only be set to %s",
1905 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1908 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1910 error ("RTP PIC is incompatible with Thumb");
1914 /* If stack checking is disabled, we can use r10 as the PIC register,
1915 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1916 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1918 if (TARGET_VXWORKS_RTP)
1919 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1920 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1923 if (flag_pic && TARGET_VXWORKS_RTP)
1924 arm_pic_register = 9;
1926 if (arm_pic_register_string != NULL)
1928 int pic_register = decode_reg_name (arm_pic_register_string);
1931 warning (0, "-mpic-register= is useless without -fpic");
1933 /* Prevent the user from choosing an obviously stupid PIC register. */
1934 else if (pic_register < 0 || call_used_regs[pic_register]
1935 || pic_register == HARD_FRAME_POINTER_REGNUM
1936 || pic_register == STACK_POINTER_REGNUM
1937 || pic_register >= PC_REGNUM
1938 || (TARGET_VXWORKS_RTP
1939 && (unsigned int) pic_register != arm_pic_register))
1940 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1942 arm_pic_register = pic_register;
1945 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1946 if (fix_cm3_ldrd == 2)
1948 if (arm_selected_cpu->core == cortexm3)
1954 if (TARGET_THUMB1 && flag_schedule_insns)
1956 /* Don't warn since it's on by default in -O2. */
1957 flag_schedule_insns = 0;
1962 /* If optimizing for size, bump the number of instructions that we
1963 are prepared to conditionally execute (even on a StrongARM). */
1964 max_insns_skipped = 6;
1968 /* StrongARM has early execution of branches, so a sequence
1969 that is worth skipping is shorter. */
1970 if (arm_tune_strongarm)
1971 max_insns_skipped = 3;
1974 /* Hot/Cold partitioning is not currently supported, since we can't
1975 handle literal pool placement in that case. */
1976 if (flag_reorder_blocks_and_partition)
1978 inform (input_location,
1979 "-freorder-blocks-and-partition not supported on this architecture");
1980 flag_reorder_blocks_and_partition = 0;
1981 flag_reorder_blocks = 1;
1985 /* Hoisting PIC address calculations more aggressively provides a small,
1986 but measurable, size reduction for PIC code. Therefore, we decrease
1987 the bar for unrestricted expression hoisting to the cost of PIC address
1988 calculation, which is 2 instructions. */
1989 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1990 global_options.x_param_values,
1991 global_options_set.x_param_values);
1993 /* ARM EABI defaults to strict volatile bitfields. */
1994 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1995 flag_strict_volatile_bitfields = 1;
1997 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1998 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1999 if (flag_prefetch_loop_arrays < 0
2002 && current_tune->num_prefetch_slots > 0)
2003 flag_prefetch_loop_arrays = 1;
2005 /* Set up parameters to be used in prefetching algorithm. Do not override the
2006 defaults unless we are tuning for a core we have researched values for. */
2007 if (current_tune->num_prefetch_slots > 0)
2008 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2009 current_tune->num_prefetch_slots,
2010 global_options.x_param_values,
2011 global_options_set.x_param_values);
2012 if (current_tune->l1_cache_line_size >= 0)
2013 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2014 current_tune->l1_cache_line_size,
2015 global_options.x_param_values,
2016 global_options_set.x_param_values);
2017 if (current_tune->l1_cache_size >= 0)
2018 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2019 current_tune->l1_cache_size,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2023 /* Register global variables with the garbage collector. */
2024 arm_add_gc_roots ();
2028 arm_add_gc_roots (void)
2030 gcc_obstack_init(&minipool_obstack);
2031 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2034 /* A table of known ARM exception types.
2035 For use with the interrupt function attribute. */
2039 const char *const arg;
2040 const unsigned long return_value;
2044 static const isr_attribute_arg isr_attribute_args [] =
2046 { "IRQ", ARM_FT_ISR },
2047 { "irq", ARM_FT_ISR },
2048 { "FIQ", ARM_FT_FIQ },
2049 { "fiq", ARM_FT_FIQ },
2050 { "ABORT", ARM_FT_ISR },
2051 { "abort", ARM_FT_ISR },
2052 { "ABORT", ARM_FT_ISR },
2053 { "abort", ARM_FT_ISR },
2054 { "UNDEF", ARM_FT_EXCEPTION },
2055 { "undef", ARM_FT_EXCEPTION },
2056 { "SWI", ARM_FT_EXCEPTION },
2057 { "swi", ARM_FT_EXCEPTION },
2058 { NULL, ARM_FT_NORMAL }
2061 /* Returns the (interrupt) function type of the current
2062 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2064 static unsigned long
2065 arm_isr_value (tree argument)
2067 const isr_attribute_arg * ptr;
2071 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2073 /* No argument - default to IRQ. */
2074 if (argument == NULL_TREE)
2077 /* Get the value of the argument. */
2078 if (TREE_VALUE (argument) == NULL_TREE
2079 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2080 return ARM_FT_UNKNOWN;
2082 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2084 /* Check it against the list of known arguments. */
2085 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2086 if (streq (arg, ptr->arg))
2087 return ptr->return_value;
2089 /* An unrecognized interrupt type. */
2090 return ARM_FT_UNKNOWN;
2093 /* Computes the type of the current function. */
2095 static unsigned long
2096 arm_compute_func_type (void)
2098 unsigned long type = ARM_FT_UNKNOWN;
2102 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2104 /* Decide if the current function is volatile. Such functions
2105 never return, and many memory cycles can be saved by not storing
2106 register values that will never be needed again. This optimization
2107 was added to speed up context switching in a kernel application. */
2109 && (TREE_NOTHROW (current_function_decl)
2110 || !(flag_unwind_tables
2112 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2113 && TREE_THIS_VOLATILE (current_function_decl))
2114 type |= ARM_FT_VOLATILE;
2116 if (cfun->static_chain_decl != NULL)
2117 type |= ARM_FT_NESTED;
2119 attr = DECL_ATTRIBUTES (current_function_decl);
2121 a = lookup_attribute ("naked", attr);
2123 type |= ARM_FT_NAKED;
2125 a = lookup_attribute ("isr", attr);
2127 a = lookup_attribute ("interrupt", attr);
2130 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2132 type |= arm_isr_value (TREE_VALUE (a));
2137 /* Returns the type of the current function. */
2140 arm_current_func_type (void)
2142 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2143 cfun->machine->func_type = arm_compute_func_type ();
2145 return cfun->machine->func_type;
2149 arm_allocate_stack_slots_for_args (void)
2151 /* Naked functions should not allocate stack slots for arguments. */
2152 return !IS_NAKED (arm_current_func_type ());
2156 /* Output assembler code for a block containing the constant parts
2157 of a trampoline, leaving space for the variable parts.
2159 On the ARM, (if r8 is the static chain regnum, and remembering that
2160 referencing pc adds an offset of 8) the trampoline looks like:
2163 .word static chain value
2164 .word function's address
2165 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2168 arm_asm_trampoline_template (FILE *f)
2172 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2173 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2175 else if (TARGET_THUMB2)
2177 /* The Thumb-2 trampoline is similar to the arm implementation.
2178 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2179 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2180 STATIC_CHAIN_REGNUM, PC_REGNUM);
2181 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2185 ASM_OUTPUT_ALIGN (f, 2);
2186 fprintf (f, "\t.code\t16\n");
2187 fprintf (f, ".Ltrampoline_start:\n");
2188 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2189 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2190 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2191 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2192 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2193 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2195 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2196 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2199 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2202 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2204 rtx fnaddr, mem, a_tramp;
2206 emit_block_move (m_tramp, assemble_trampoline_template (),
2207 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2209 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2210 emit_move_insn (mem, chain_value);
2212 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2213 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2214 emit_move_insn (mem, fnaddr);
2216 a_tramp = XEXP (m_tramp, 0);
2217 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2218 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2219 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2222 /* Thumb trampolines should be entered in thumb mode, so set
2223 the bottom bit of the address. */
2226 arm_trampoline_adjust_address (rtx addr)
2229 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2230 NULL, 0, OPTAB_LIB_WIDEN);
2234 /* Return 1 if it is possible to return using a single instruction.
2235 If SIBLING is non-null, this is a test for a return before a sibling
2236 call. SIBLING is the call insn, so we can examine its register usage. */
2239 use_return_insn (int iscond, rtx sibling)
2242 unsigned int func_type;
2243 unsigned long saved_int_regs;
2244 unsigned HOST_WIDE_INT stack_adjust;
2245 arm_stack_offsets *offsets;
2247 /* Never use a return instruction before reload has run. */
2248 if (!reload_completed)
2251 func_type = arm_current_func_type ();
2253 /* Naked, volatile and stack alignment functions need special
2255 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2258 /* So do interrupt functions that use the frame pointer and Thumb
2259 interrupt functions. */
2260 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2263 offsets = arm_get_frame_offsets ();
2264 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2266 /* As do variadic functions. */
2267 if (crtl->args.pretend_args_size
2268 || cfun->machine->uses_anonymous_args
2269 /* Or if the function calls __builtin_eh_return () */
2270 || crtl->calls_eh_return
2271 /* Or if the function calls alloca */
2272 || cfun->calls_alloca
2273 /* Or if there is a stack adjustment. However, if the stack pointer
2274 is saved on the stack, we can use a pre-incrementing stack load. */
2275 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2276 && stack_adjust == 4)))
2279 saved_int_regs = offsets->saved_regs_mask;
2281 /* Unfortunately, the insn
2283 ldmib sp, {..., sp, ...}
2285 triggers a bug on most SA-110 based devices, such that the stack
2286 pointer won't be correctly restored if the instruction takes a
2287 page fault. We work around this problem by popping r3 along with
2288 the other registers, since that is never slower than executing
2289 another instruction.
2291 We test for !arm_arch5 here, because code for any architecture
2292 less than this could potentially be run on one of the buggy
2294 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2296 /* Validate that r3 is a call-clobbered register (always true in
2297 the default abi) ... */
2298 if (!call_used_regs[3])
2301 /* ... that it isn't being used for a return value ... */
2302 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2305 /* ... or for a tail-call argument ... */
2308 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2310 if (find_regno_fusage (sibling, USE, 3))
2314 /* ... and that there are no call-saved registers in r0-r2
2315 (always true in the default ABI). */
2316 if (saved_int_regs & 0x7)
2320 /* Can't be done if interworking with Thumb, and any registers have been
2322 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2325 /* On StrongARM, conditional returns are expensive if they aren't
2326 taken and multiple registers have been stacked. */
2327 if (iscond && arm_tune_strongarm)
2329 /* Conditional return when just the LR is stored is a simple
2330 conditional-load instruction, that's not expensive. */
2331 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2335 && arm_pic_register != INVALID_REGNUM
2336 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2340 /* If there are saved registers but the LR isn't saved, then we need
2341 two instructions for the return. */
2342 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2345 /* Can't be done if any of the FPA regs are pushed,
2346 since this also requires an insn. */
2347 if (TARGET_HARD_FLOAT && TARGET_FPA)
2348 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2349 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2352 /* Likewise VFP regs. */
2353 if (TARGET_HARD_FLOAT && TARGET_VFP)
2354 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2355 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2358 if (TARGET_REALLY_IWMMXT)
2359 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2360 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2366 /* Return TRUE if int I is a valid immediate ARM constant. */
2369 const_ok_for_arm (HOST_WIDE_INT i)
2373 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2374 be all zero, or all one. */
2375 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2376 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2377 != ((~(unsigned HOST_WIDE_INT) 0)
2378 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2381 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2383 /* Fast return for 0 and small values. We must do this for zero, since
2384 the code below can't handle that one case. */
2385 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2388 /* Get the number of trailing zeros. */
2389 lowbit = ffs((int) i) - 1;
2391 /* Only even shifts are allowed in ARM mode so round down to the
2392 nearest even number. */
2396 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2401 /* Allow rotated constants in ARM mode. */
2403 && ((i & ~0xc000003f) == 0
2404 || (i & ~0xf000000f) == 0
2405 || (i & ~0xfc000003) == 0))
2412 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2415 if (i == v || i == (v | (v << 8)))
2418 /* Allow repeated pattern 0xXY00XY00. */
2428 /* Return true if I is a valid constant for the operation CODE. */
2430 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2432 if (const_ok_for_arm (i))
2456 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2458 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2464 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2468 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2475 /* Emit a sequence of insns to handle a large constant.
2476 CODE is the code of the operation required, it can be any of SET, PLUS,
2477 IOR, AND, XOR, MINUS;
2478 MODE is the mode in which the operation is being performed;
2479 VAL is the integer to operate on;
2480 SOURCE is the other operand (a register, or a null-pointer for SET);
2481 SUBTARGETS means it is safe to create scratch registers if that will
2482 either produce a simpler sequence, or we will want to cse the values.
2483 Return value is the number of insns emitted. */
2485 /* ??? Tweak this for thumb2. */
2487 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2488 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2492 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2493 cond = COND_EXEC_TEST (PATTERN (insn));
2497 if (subtargets || code == SET
2498 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2499 && REGNO (target) != REGNO (source)))
2501 /* After arm_reorg has been called, we can't fix up expensive
2502 constants by pushing them into memory so we must synthesize
2503 them in-line, regardless of the cost. This is only likely to
2504 be more costly on chips that have load delay slots and we are
2505 compiling without running the scheduler (so no splitting
2506 occurred before the final instruction emission).
2508 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2510 if (!after_arm_reorg
2512 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2514 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2519 /* Currently SET is the only monadic value for CODE, all
2520 the rest are diadic. */
2521 if (TARGET_USE_MOVT)
2522 arm_emit_movpair (target, GEN_INT (val));
2524 emit_set_insn (target, GEN_INT (val));
2530 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2532 if (TARGET_USE_MOVT)
2533 arm_emit_movpair (temp, GEN_INT (val));
2535 emit_set_insn (temp, GEN_INT (val));
2537 /* For MINUS, the value is subtracted from, since we never
2538 have subtraction of a constant. */
2540 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2542 emit_set_insn (target,
2543 gen_rtx_fmt_ee (code, mode, source, temp));
2549 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2553 /* Return the number of instructions required to synthesize the given
2554 constant, if we start emitting them from bit-position I. */
2556 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2558 HOST_WIDE_INT temp1;
2559 int step_size = TARGET_ARM ? 2 : 1;
2562 gcc_assert (TARGET_ARM || i == 0);
2570 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2575 temp1 = remainder & ((0x0ff << end)
2576 | ((i < end) ? (0xff >> (32 - end)) : 0));
2577 remainder &= ~temp1;
2582 } while (remainder);
2587 find_best_start (unsigned HOST_WIDE_INT remainder)
2589 int best_consecutive_zeros = 0;
2593 /* If we aren't targetting ARM, the best place to start is always at
2598 for (i = 0; i < 32; i += 2)
2600 int consecutive_zeros = 0;
2602 if (!(remainder & (3 << i)))
2604 while ((i < 32) && !(remainder & (3 << i)))
2606 consecutive_zeros += 2;
2609 if (consecutive_zeros > best_consecutive_zeros)
2611 best_consecutive_zeros = consecutive_zeros;
2612 best_start = i - consecutive_zeros;
2618 /* So long as it won't require any more insns to do so, it's
2619 desirable to emit a small constant (in bits 0...9) in the last
2620 insn. This way there is more chance that it can be combined with
2621 a later addressing insn to form a pre-indexed load or store
2622 operation. Consider:
2624 *((volatile int *)0xe0000100) = 1;
2625 *((volatile int *)0xe0000110) = 2;
2627 We want this to wind up as:
2631 str rB, [rA, #0x100]
2633 str rB, [rA, #0x110]
2635 rather than having to synthesize both large constants from scratch.
2637 Therefore, we calculate how many insns would be required to emit
2638 the constant starting from `best_start', and also starting from
2639 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2640 yield a shorter sequence, we may as well use zero. */
2642 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2643 && (count_insns_for_constant (remainder, 0) <=
2644 count_insns_for_constant (remainder, best_start)))
2650 /* Emit an instruction with the indicated PATTERN. If COND is
2651 non-NULL, conditionalize the execution of the instruction on COND
2655 emit_constant_insn (rtx cond, rtx pattern)
2658 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2659 emit_insn (pattern);
2662 /* As above, but extra parameter GENERATE which, if clear, suppresses
2664 /* ??? This needs more work for thumb2. */
2667 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2668 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2673 int final_invert = 0;
2674 int can_negate_initial = 0;
2676 int num_bits_set = 0;
2677 int set_sign_bit_copies = 0;
2678 int clear_sign_bit_copies = 0;
2679 int clear_zero_bit_copies = 0;
2680 int set_zero_bit_copies = 0;
2682 unsigned HOST_WIDE_INT temp1, temp2;
2683 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2684 int step_size = TARGET_ARM ? 2 : 1;
2686 /* Find out which operations are safe for a given CODE. Also do a quick
2687 check for degenerate cases; these can occur when DImode operations
2698 can_negate_initial = 1;
2702 if (remainder == 0xffffffff)
2705 emit_constant_insn (cond,
2706 gen_rtx_SET (VOIDmode, target,
2707 GEN_INT (ARM_SIGN_EXTEND (val))));
2713 if (reload_completed && rtx_equal_p (target, source))
2717 emit_constant_insn (cond,
2718 gen_rtx_SET (VOIDmode, target, source));
2730 emit_constant_insn (cond,
2731 gen_rtx_SET (VOIDmode, target, const0_rtx));
2734 if (remainder == 0xffffffff)
2736 if (reload_completed && rtx_equal_p (target, source))
2739 emit_constant_insn (cond,
2740 gen_rtx_SET (VOIDmode, target, source));
2749 if (reload_completed && rtx_equal_p (target, source))
2752 emit_constant_insn (cond,
2753 gen_rtx_SET (VOIDmode, target, source));
2757 if (remainder == 0xffffffff)
2760 emit_constant_insn (cond,
2761 gen_rtx_SET (VOIDmode, target,
2762 gen_rtx_NOT (mode, source)));
2768 /* We treat MINUS as (val - source), since (source - val) is always
2769 passed as (source + (-val)). */
2773 emit_constant_insn (cond,
2774 gen_rtx_SET (VOIDmode, target,
2775 gen_rtx_NEG (mode, source)));
2778 if (const_ok_for_arm (val))
2781 emit_constant_insn (cond,
2782 gen_rtx_SET (VOIDmode, target,
2783 gen_rtx_MINUS (mode, GEN_INT (val),
2795 /* If we can do it in one insn get out quickly. */
2796 if (const_ok_for_arm (val)
2797 || (can_negate_initial && const_ok_for_arm (-val))
2798 || (can_invert && const_ok_for_arm (~val)))
2801 emit_constant_insn (cond,
2802 gen_rtx_SET (VOIDmode, target,
2804 ? gen_rtx_fmt_ee (code, mode, source,
2810 /* Calculate a few attributes that may be useful for specific
2812 /* Count number of leading zeros. */
2813 for (i = 31; i >= 0; i--)
2815 if ((remainder & (1 << i)) == 0)
2816 clear_sign_bit_copies++;
2821 /* Count number of leading 1's. */
2822 for (i = 31; i >= 0; i--)
2824 if ((remainder & (1 << i)) != 0)
2825 set_sign_bit_copies++;
2830 /* Count number of trailing zero's. */
2831 for (i = 0; i <= 31; i++)
2833 if ((remainder & (1 << i)) == 0)
2834 clear_zero_bit_copies++;
2839 /* Count number of trailing 1's. */
2840 for (i = 0; i <= 31; i++)
2842 if ((remainder & (1 << i)) != 0)
2843 set_zero_bit_copies++;
2851 /* See if we can use movw. */
2852 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2855 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2860 /* See if we can do this by sign_extending a constant that is known
2861 to be negative. This is a good, way of doing it, since the shift
2862 may well merge into a subsequent insn. */
2863 if (set_sign_bit_copies > 1)
2865 if (const_ok_for_arm
2866 (temp1 = ARM_SIGN_EXTEND (remainder
2867 << (set_sign_bit_copies - 1))))
2871 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2872 emit_constant_insn (cond,
2873 gen_rtx_SET (VOIDmode, new_src,
2875 emit_constant_insn (cond,
2876 gen_ashrsi3 (target, new_src,
2877 GEN_INT (set_sign_bit_copies - 1)));
2881 /* For an inverted constant, we will need to set the low bits,
2882 these will be shifted out of harm's way. */
2883 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2884 if (const_ok_for_arm (~temp1))
2888 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2889 emit_constant_insn (cond,
2890 gen_rtx_SET (VOIDmode, new_src,
2892 emit_constant_insn (cond,
2893 gen_ashrsi3 (target, new_src,
2894 GEN_INT (set_sign_bit_copies - 1)));
2900 /* See if we can calculate the value as the difference between two
2901 valid immediates. */
2902 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2904 int topshift = clear_sign_bit_copies & ~1;
2906 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2907 & (0xff000000 >> topshift));
2909 /* If temp1 is zero, then that means the 9 most significant
2910 bits of remainder were 1 and we've caused it to overflow.
2911 When topshift is 0 we don't need to do anything since we
2912 can borrow from 'bit 32'. */
2913 if (temp1 == 0 && topshift != 0)
2914 temp1 = 0x80000000 >> (topshift - 1);
2916 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2918 if (const_ok_for_arm (temp2))
2922 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2923 emit_constant_insn (cond,
2924 gen_rtx_SET (VOIDmode, new_src,
2926 emit_constant_insn (cond,
2927 gen_addsi3 (target, new_src,
2935 /* See if we can generate this by setting the bottom (or the top)
2936 16 bits, and then shifting these into the other half of the
2937 word. We only look for the simplest cases, to do more would cost
2938 too much. Be careful, however, not to generate this when the
2939 alternative would take fewer insns. */
2940 if (val & 0xffff0000)
2942 temp1 = remainder & 0xffff0000;
2943 temp2 = remainder & 0x0000ffff;
2945 /* Overlaps outside this range are best done using other methods. */
2946 for (i = 9; i < 24; i++)
2948 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2949 && !const_ok_for_arm (temp2))
2951 rtx new_src = (subtargets
2952 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2954 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2955 source, subtargets, generate);
2963 gen_rtx_ASHIFT (mode, source,
2970 /* Don't duplicate cases already considered. */
2971 for (i = 17; i < 24; i++)
2973 if (((temp1 | (temp1 >> i)) == remainder)
2974 && !const_ok_for_arm (temp1))
2976 rtx new_src = (subtargets
2977 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2979 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2980 source, subtargets, generate);
2985 gen_rtx_SET (VOIDmode, target,
2988 gen_rtx_LSHIFTRT (mode, source,
2999 /* If we have IOR or XOR, and the constant can be loaded in a
3000 single instruction, and we can find a temporary to put it in,
3001 then this can be done in two instructions instead of 3-4. */
3003 /* TARGET can't be NULL if SUBTARGETS is 0 */
3004 || (reload_completed && !reg_mentioned_p (target, source)))
3006 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3010 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3012 emit_constant_insn (cond,
3013 gen_rtx_SET (VOIDmode, sub,
3015 emit_constant_insn (cond,
3016 gen_rtx_SET (VOIDmode, target,
3017 gen_rtx_fmt_ee (code, mode,
3028 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3029 and the remainder 0s for e.g. 0xfff00000)
3030 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3032 This can be done in 2 instructions by using shifts with mov or mvn.
3037 mvn r0, r0, lsr #12 */
3038 if (set_sign_bit_copies > 8
3039 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3043 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3044 rtx shift = GEN_INT (set_sign_bit_copies);
3048 gen_rtx_SET (VOIDmode, sub,
3050 gen_rtx_ASHIFT (mode,
3055 gen_rtx_SET (VOIDmode, target,
3057 gen_rtx_LSHIFTRT (mode, sub,
3064 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3066 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3068 For eg. r0 = r0 | 0xfff
3073 if (set_zero_bit_copies > 8
3074 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3078 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3079 rtx shift = GEN_INT (set_zero_bit_copies);
3083 gen_rtx_SET (VOIDmode, sub,
3085 gen_rtx_LSHIFTRT (mode,
3090 gen_rtx_SET (VOIDmode, target,
3092 gen_rtx_ASHIFT (mode, sub,
3098 /* This will never be reached for Thumb2 because orn is a valid
3099 instruction. This is for Thumb1 and the ARM 32 bit cases.
3101 x = y | constant (such that ~constant is a valid constant)
3103 x = ~(~y & ~constant).
3105 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3109 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3110 emit_constant_insn (cond,
3111 gen_rtx_SET (VOIDmode, sub,
3112 gen_rtx_NOT (mode, source)));
3115 sub = gen_reg_rtx (mode);
3116 emit_constant_insn (cond,
3117 gen_rtx_SET (VOIDmode, sub,
3118 gen_rtx_AND (mode, source,
3120 emit_constant_insn (cond,
3121 gen_rtx_SET (VOIDmode, target,
3122 gen_rtx_NOT (mode, sub)));
3129 /* See if two shifts will do 2 or more insn's worth of work. */
3130 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3132 HOST_WIDE_INT shift_mask = ((0xffffffff
3133 << (32 - clear_sign_bit_copies))
3136 if ((remainder | shift_mask) != 0xffffffff)
3140 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3141 insns = arm_gen_constant (AND, mode, cond,
3142 remainder | shift_mask,
3143 new_src, source, subtargets, 1);
3148 rtx targ = subtargets ? NULL_RTX : target;
3149 insns = arm_gen_constant (AND, mode, cond,
3150 remainder | shift_mask,
3151 targ, source, subtargets, 0);
3157 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3158 rtx shift = GEN_INT (clear_sign_bit_copies);
3160 emit_insn (gen_ashlsi3 (new_src, source, shift));
3161 emit_insn (gen_lshrsi3 (target, new_src, shift));
3167 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3169 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3171 if ((remainder | shift_mask) != 0xffffffff)
3175 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3177 insns = arm_gen_constant (AND, mode, cond,
3178 remainder | shift_mask,
3179 new_src, source, subtargets, 1);
3184 rtx targ = subtargets ? NULL_RTX : target;
3186 insns = arm_gen_constant (AND, mode, cond,
3187 remainder | shift_mask,
3188 targ, source, subtargets, 0);
3194 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3195 rtx shift = GEN_INT (clear_zero_bit_copies);
3197 emit_insn (gen_lshrsi3 (new_src, source, shift));
3198 emit_insn (gen_ashlsi3 (target, new_src, shift));
3210 for (i = 0; i < 32; i++)
3211 if (remainder & (1 << i))
3215 || (code != IOR && can_invert && num_bits_set > 16))
3216 remainder ^= 0xffffffff;
3217 else if (code == PLUS && num_bits_set > 16)
3218 remainder = (-remainder) & 0xffffffff;
3220 /* For XOR, if more than half the bits are set and there's a sequence
3221 of more than 8 consecutive ones in the pattern then we can XOR by the
3222 inverted constant and then invert the final result; this may save an
3223 instruction and might also lead to the final mvn being merged with
3224 some other operation. */
3225 else if (code == XOR && num_bits_set > 16
3226 && (count_insns_for_constant (remainder ^ 0xffffffff,
3228 (remainder ^ 0xffffffff))
3229 < count_insns_for_constant (remainder,
3230 find_best_start (remainder))))
3232 remainder ^= 0xffffffff;
3241 /* Now try and find a way of doing the job in either two or three
3243 We start by looking for the largest block of zeros that are aligned on
3244 a 2-bit boundary, we then fill up the temps, wrapping around to the
3245 top of the word when we drop off the bottom.
3246 In the worst case this code should produce no more than four insns.
3247 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3248 best place to start. */
3250 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3253 /* Now start emitting the insns. */
3254 i = find_best_start (remainder);
3261 if (remainder & (3 << (i - 2)))
3266 temp1 = remainder & ((0x0ff << end)
3267 | ((i < end) ? (0xff >> (32 - end)) : 0));
3268 remainder &= ~temp1;
3272 rtx new_src, temp1_rtx;
3274 if (code == SET || code == MINUS)
3276 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3277 if (can_invert && code != MINUS)
3282 if ((final_invert || remainder) && subtargets)
3283 new_src = gen_reg_rtx (mode);
3288 else if (can_negate)
3292 temp1 = trunc_int_for_mode (temp1, mode);
3293 temp1_rtx = GEN_INT (temp1);
3297 else if (code == MINUS)
3298 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3300 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3302 emit_constant_insn (cond,
3303 gen_rtx_SET (VOIDmode, new_src,
3313 else if (code == MINUS)
3319 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3329 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3330 gen_rtx_NOT (mode, source)));
3337 /* Canonicalize a comparison so that we are more likely to recognize it.
3338 This can be done for a few constant compares, where we can make the
3339 immediate value easier to load. */
3342 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3344 enum machine_mode mode;
3345 unsigned HOST_WIDE_INT i, maxval;
3347 mode = GET_MODE (*op0);
3348 if (mode == VOIDmode)
3349 mode = GET_MODE (*op1);
3351 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3353 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3354 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3355 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3356 for GTU/LEU in Thumb mode. */
3361 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3363 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3366 if (code == GT || code == LE
3367 || (!TARGET_ARM && (code == GTU || code == LEU)))
3369 /* Missing comparison. First try to use an available
3371 if (GET_CODE (*op1) == CONST_INT)
3379 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3381 *op1 = GEN_INT (i + 1);
3382 return code == GT ? GE : LT;
3387 if (i != ~((unsigned HOST_WIDE_INT) 0)
3388 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3390 *op1 = GEN_INT (i + 1);
3391 return code == GTU ? GEU : LTU;
3399 /* If that did not work, reverse the condition. */
3403 return swap_condition (code);
3409 /* Comparisons smaller than DImode. Only adjust comparisons against
3410 an out-of-range constant. */
3411 if (GET_CODE (*op1) != CONST_INT
3412 || const_ok_for_arm (INTVAL (*op1))
3413 || const_ok_for_arm (- INTVAL (*op1)))
3427 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3429 *op1 = GEN_INT (i + 1);
3430 return code == GT ? GE : LT;
3437 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3439 *op1 = GEN_INT (i - 1);
3440 return code == GE ? GT : LE;
3446 if (i != ~((unsigned HOST_WIDE_INT) 0)
3447 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3449 *op1 = GEN_INT (i + 1);
3450 return code == GTU ? GEU : LTU;
3457 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3459 *op1 = GEN_INT (i - 1);
3460 return code == GEU ? GTU : LEU;
3472 /* Define how to find the value returned by a function. */
3475 arm_function_value(const_tree type, const_tree func,
3476 bool outgoing ATTRIBUTE_UNUSED)
3478 enum machine_mode mode;
3479 int unsignedp ATTRIBUTE_UNUSED;
3480 rtx r ATTRIBUTE_UNUSED;
3482 mode = TYPE_MODE (type);
3484 if (TARGET_AAPCS_BASED)
3485 return aapcs_allocate_return_reg (mode, type, func);
3487 /* Promote integer types. */
3488 if (INTEGRAL_TYPE_P (type))
3489 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3491 /* Promotes small structs returned in a register to full-word size
3492 for big-endian AAPCS. */
3493 if (arm_return_in_msb (type))
3495 HOST_WIDE_INT size = int_size_in_bytes (type);
3496 if (size % UNITS_PER_WORD != 0)
3498 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3499 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3503 return LIBCALL_VALUE (mode);
3507 libcall_eq (const void *p1, const void *p2)
3509 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3513 libcall_hash (const void *p1)
3515 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3519 add_libcall (htab_t htab, rtx libcall)
3521 *htab_find_slot (htab, libcall, INSERT) = libcall;
3525 arm_libcall_uses_aapcs_base (const_rtx libcall)
3527 static bool init_done = false;
3528 static htab_t libcall_htab;
3534 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3536 add_libcall (libcall_htab,
3537 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3538 add_libcall (libcall_htab,
3539 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3540 add_libcall (libcall_htab,
3541 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3542 add_libcall (libcall_htab,
3543 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3545 add_libcall (libcall_htab,
3546 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3547 add_libcall (libcall_htab,
3548 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3549 add_libcall (libcall_htab,
3550 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3551 add_libcall (libcall_htab,
3552 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3554 add_libcall (libcall_htab,
3555 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3556 add_libcall (libcall_htab,
3557 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3558 add_libcall (libcall_htab,
3559 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3560 add_libcall (libcall_htab,
3561 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3562 add_libcall (libcall_htab,
3563 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3564 add_libcall (libcall_htab,
3565 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3568 return libcall && htab_find (libcall_htab, libcall) != NULL;
3572 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3574 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3575 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3577 /* The following libcalls return their result in integer registers,
3578 even though they return a floating point value. */
3579 if (arm_libcall_uses_aapcs_base (libcall))
3580 return gen_rtx_REG (mode, ARG_REGISTER(1));
3584 return LIBCALL_VALUE (mode);
3587 /* Determine the amount of memory needed to store the possible return
3588 registers of an untyped call. */
3590 arm_apply_result_size (void)
3596 if (TARGET_HARD_FLOAT_ABI)
3602 if (TARGET_MAVERICK)
3605 if (TARGET_IWMMXT_ABI)
3612 /* Decide whether TYPE should be returned in memory (true)
3613 or in a register (false). FNTYPE is the type of the function making
3616 arm_return_in_memory (const_tree type, const_tree fntype)
3620 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3622 if (TARGET_AAPCS_BASED)
3624 /* Simple, non-aggregate types (ie not including vectors and
3625 complex) are always returned in a register (or registers).
3626 We don't care about which register here, so we can short-cut
3627 some of the detail. */
3628 if (!AGGREGATE_TYPE_P (type)
3629 && TREE_CODE (type) != VECTOR_TYPE
3630 && TREE_CODE (type) != COMPLEX_TYPE)
3633 /* Any return value that is no larger than one word can be
3635 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3638 /* Check any available co-processors to see if they accept the
3639 type as a register candidate (VFP, for example, can return
3640 some aggregates in consecutive registers). These aren't
3641 available if the call is variadic. */
3642 if (aapcs_select_return_coproc (type, fntype) >= 0)
3645 /* Vector values should be returned using ARM registers, not
3646 memory (unless they're over 16 bytes, which will break since
3647 we only have four call-clobbered registers to play with). */
3648 if (TREE_CODE (type) == VECTOR_TYPE)
3649 return (size < 0 || size > (4 * UNITS_PER_WORD));
3651 /* The rest go in memory. */
3655 if (TREE_CODE (type) == VECTOR_TYPE)
3656 return (size < 0 || size > (4 * UNITS_PER_WORD));
3658 if (!AGGREGATE_TYPE_P (type) &&
3659 (TREE_CODE (type) != VECTOR_TYPE))
3660 /* All simple types are returned in registers. */
3663 if (arm_abi != ARM_ABI_APCS)
3665 /* ATPCS and later return aggregate types in memory only if they are
3666 larger than a word (or are variable size). */
3667 return (size < 0 || size > UNITS_PER_WORD);
3670 /* For the arm-wince targets we choose to be compatible with Microsoft's
3671 ARM and Thumb compilers, which always return aggregates in memory. */
3673 /* All structures/unions bigger than one word are returned in memory.
3674 Also catch the case where int_size_in_bytes returns -1. In this case
3675 the aggregate is either huge or of variable size, and in either case
3676 we will want to return it via memory and not in a register. */
3677 if (size < 0 || size > UNITS_PER_WORD)
3680 if (TREE_CODE (type) == RECORD_TYPE)
3684 /* For a struct the APCS says that we only return in a register
3685 if the type is 'integer like' and every addressable element
3686 has an offset of zero. For practical purposes this means
3687 that the structure can have at most one non bit-field element
3688 and that this element must be the first one in the structure. */
3690 /* Find the first field, ignoring non FIELD_DECL things which will
3691 have been created by C++. */
3692 for (field = TYPE_FIELDS (type);
3693 field && TREE_CODE (field) != FIELD_DECL;
3694 field = DECL_CHAIN (field))
3698 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3700 /* Check that the first field is valid for returning in a register. */
3702 /* ... Floats are not allowed */
3703 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3706 /* ... Aggregates that are not themselves valid for returning in
3707 a register are not allowed. */
3708 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3711 /* Now check the remaining fields, if any. Only bitfields are allowed,
3712 since they are not addressable. */
3713 for (field = DECL_CHAIN (field);
3715 field = DECL_CHAIN (field))
3717 if (TREE_CODE (field) != FIELD_DECL)
3720 if (!DECL_BIT_FIELD_TYPE (field))
3727 if (TREE_CODE (type) == UNION_TYPE)
3731 /* Unions can be returned in registers if every element is
3732 integral, or can be returned in an integer register. */
3733 for (field = TYPE_FIELDS (type);
3735 field = DECL_CHAIN (field))
3737 if (TREE_CODE (field) != FIELD_DECL)
3740 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3743 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3749 #endif /* not ARM_WINCE */
3751 /* Return all other types in memory. */
3755 /* Indicate whether or not words of a double are in big-endian order. */
3758 arm_float_words_big_endian (void)
3760 if (TARGET_MAVERICK)
3763 /* For FPA, float words are always big-endian. For VFP, floats words
3764 follow the memory system mode. */
3772 return (TARGET_BIG_END ? 1 : 0);
3777 const struct pcs_attribute_arg
3781 } pcs_attribute_args[] =
3783 {"aapcs", ARM_PCS_AAPCS},
3784 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3786 /* We could recognize these, but changes would be needed elsewhere
3787 * to implement them. */
3788 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3789 {"atpcs", ARM_PCS_ATPCS},
3790 {"apcs", ARM_PCS_APCS},
3792 {NULL, ARM_PCS_UNKNOWN}
3796 arm_pcs_from_attribute (tree attr)
3798 const struct pcs_attribute_arg *ptr;
3801 /* Get the value of the argument. */
3802 if (TREE_VALUE (attr) == NULL_TREE
3803 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3804 return ARM_PCS_UNKNOWN;
3806 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3808 /* Check it against the list of known arguments. */
3809 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3810 if (streq (arg, ptr->arg))
3813 /* An unrecognized interrupt type. */
3814 return ARM_PCS_UNKNOWN;
3817 /* Get the PCS variant to use for this call. TYPE is the function's type
3818 specification, DECL is the specific declartion. DECL may be null if
3819 the call could be indirect or if this is a library call. */
3821 arm_get_pcs_model (const_tree type, const_tree decl)
3823 bool user_convention = false;
3824 enum arm_pcs user_pcs = arm_pcs_default;
3829 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3832 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3833 user_convention = true;
3836 if (TARGET_AAPCS_BASED)
3838 /* Detect varargs functions. These always use the base rules
3839 (no argument is ever a candidate for a co-processor
3841 bool base_rules = stdarg_p (type);
3843 if (user_convention)
3845 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3846 sorry ("non-AAPCS derived PCS variant");
3847 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3848 error ("variadic functions must use the base AAPCS variant");
3852 return ARM_PCS_AAPCS;
3853 else if (user_convention)
3855 else if (decl && flag_unit_at_a_time)
3857 /* Local functions never leak outside this compilation unit,
3858 so we are free to use whatever conventions are
3860 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3861 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3863 return ARM_PCS_AAPCS_LOCAL;
3866 else if (user_convention && user_pcs != arm_pcs_default)
3867 sorry ("PCS variant");
3869 /* For everything else we use the target's default. */
3870 return arm_pcs_default;
3875 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3876 const_tree fntype ATTRIBUTE_UNUSED,
3877 rtx libcall ATTRIBUTE_UNUSED,
3878 const_tree fndecl ATTRIBUTE_UNUSED)
3880 /* Record the unallocated VFP registers. */
3881 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3882 pcum->aapcs_vfp_reg_alloc = 0;
3885 /* Walk down the type tree of TYPE counting consecutive base elements.
3886 If *MODEP is VOIDmode, then set it to the first valid floating point
3887 type. If a non-floating point type is found, or if a floating point
3888 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3889 otherwise return the count in the sub-tree. */
3891 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3893 enum machine_mode mode;
3896 switch (TREE_CODE (type))
3899 mode = TYPE_MODE (type);
3900 if (mode != DFmode && mode != SFmode)
3903 if (*modep == VOIDmode)
3912 mode = TYPE_MODE (TREE_TYPE (type));
3913 if (mode != DFmode && mode != SFmode)
3916 if (*modep == VOIDmode)
3925 /* Use V2SImode and V4SImode as representatives of all 64-bit
3926 and 128-bit vector types, whether or not those modes are
3927 supported with the present options. */
3928 size = int_size_in_bytes (type);
3941 if (*modep == VOIDmode)
3944 /* Vector modes are considered to be opaque: two vectors are
3945 equivalent for the purposes of being homogeneous aggregates
3946 if they are the same size. */
3955 tree index = TYPE_DOMAIN (type);
3957 /* Can't handle incomplete types. */
3958 if (!COMPLETE_TYPE_P(type))
3961 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3964 || !TYPE_MAX_VALUE (index)
3965 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3966 || !TYPE_MIN_VALUE (index)
3967 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3971 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3972 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3974 /* There must be no padding. */
3975 if (!host_integerp (TYPE_SIZE (type), 1)
3976 || (tree_low_cst (TYPE_SIZE (type), 1)
3977 != count * GET_MODE_BITSIZE (*modep)))
3989 /* Can't handle incomplete types. */
3990 if (!COMPLETE_TYPE_P(type))
3993 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3995 if (TREE_CODE (field) != FIELD_DECL)
3998 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4004 /* There must be no padding. */
4005 if (!host_integerp (TYPE_SIZE (type), 1)
4006 || (tree_low_cst (TYPE_SIZE (type), 1)
4007 != count * GET_MODE_BITSIZE (*modep)))
4014 case QUAL_UNION_TYPE:
4016 /* These aren't very interesting except in a degenerate case. */
4021 /* Can't handle incomplete types. */
4022 if (!COMPLETE_TYPE_P(type))
4025 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4027 if (TREE_CODE (field) != FIELD_DECL)
4030 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4033 count = count > sub_count ? count : sub_count;
4036 /* There must be no padding. */
4037 if (!host_integerp (TYPE_SIZE (type), 1)
4038 || (tree_low_cst (TYPE_SIZE (type), 1)
4039 != count * GET_MODE_BITSIZE (*modep)))
4052 /* Return true if PCS_VARIANT should use VFP registers. */
4054 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4056 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4058 static bool seen_thumb1_vfp = false;
4060 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4062 sorry ("Thumb-1 hard-float VFP ABI");
4063 /* sorry() is not immediately fatal, so only display this once. */
4064 seen_thumb1_vfp = true;
4070 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4073 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4074 (TARGET_VFP_DOUBLE || !is_double));
4078 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4079 enum machine_mode mode, const_tree type,
4080 enum machine_mode *base_mode, int *count)
4082 enum machine_mode new_mode = VOIDmode;
4084 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4085 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4086 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4091 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4094 new_mode = (mode == DCmode ? DFmode : SFmode);
4096 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4098 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4100 if (ag_count > 0 && ag_count <= 4)
4109 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4112 *base_mode = new_mode;
4117 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4118 enum machine_mode mode, const_tree type)
4120 int count ATTRIBUTE_UNUSED;
4121 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4123 if (!use_vfp_abi (pcs_variant, false))
4125 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4130 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4133 if (!use_vfp_abi (pcum->pcs_variant, false))
4136 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4137 &pcum->aapcs_vfp_rmode,
4138 &pcum->aapcs_vfp_rcount);
4142 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4143 const_tree type ATTRIBUTE_UNUSED)
4145 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4146 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4149 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4150 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4152 pcum->aapcs_vfp_reg_alloc = mask << regno;
4153 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4156 int rcount = pcum->aapcs_vfp_rcount;
4158 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4162 /* Avoid using unsupported vector modes. */
4163 if (rmode == V2SImode)
4165 else if (rmode == V4SImode)
4172 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4173 for (i = 0; i < rcount; i++)
4175 rtx tmp = gen_rtx_REG (rmode,
4176 FIRST_VFP_REGNUM + regno + i * rshift);
4177 tmp = gen_rtx_EXPR_LIST
4179 GEN_INT (i * GET_MODE_SIZE (rmode)));
4180 XVECEXP (par, 0, i) = tmp;
4183 pcum->aapcs_reg = par;
4186 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4193 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4194 enum machine_mode mode,
4195 const_tree type ATTRIBUTE_UNUSED)
4197 if (!use_vfp_abi (pcs_variant, false))
4200 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4203 enum machine_mode ag_mode;
4208 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4213 if (ag_mode == V2SImode)
4215 else if (ag_mode == V4SImode)
4221 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4222 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4223 for (i = 0; i < count; i++)
4225 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4226 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4227 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4228 XVECEXP (par, 0, i) = tmp;
4234 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4238 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4239 enum machine_mode mode ATTRIBUTE_UNUSED,
4240 const_tree type ATTRIBUTE_UNUSED)
4242 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4243 pcum->aapcs_vfp_reg_alloc = 0;
4247 #define AAPCS_CP(X) \
4249 aapcs_ ## X ## _cum_init, \
4250 aapcs_ ## X ## _is_call_candidate, \
4251 aapcs_ ## X ## _allocate, \
4252 aapcs_ ## X ## _is_return_candidate, \
4253 aapcs_ ## X ## _allocate_return_reg, \
4254 aapcs_ ## X ## _advance \
4257 /* Table of co-processors that can be used to pass arguments in
4258 registers. Idealy no arugment should be a candidate for more than
4259 one co-processor table entry, but the table is processed in order
4260 and stops after the first match. If that entry then fails to put
4261 the argument into a co-processor register, the argument will go on
4265 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4266 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4268 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4269 BLKmode) is a candidate for this co-processor's registers; this
4270 function should ignore any position-dependent state in
4271 CUMULATIVE_ARGS and only use call-type dependent information. */
4272 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4274 /* Return true if the argument does get a co-processor register; it
4275 should set aapcs_reg to an RTX of the register allocated as is
4276 required for a return from FUNCTION_ARG. */
4277 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4279 /* Return true if a result of mode MODE (or type TYPE if MODE is
4280 BLKmode) is can be returned in this co-processor's registers. */
4281 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4283 /* Allocate and return an RTX element to hold the return type of a
4284 call, this routine must not fail and will only be called if
4285 is_return_candidate returned true with the same parameters. */
4286 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4288 /* Finish processing this argument and prepare to start processing
4290 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4291 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4299 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4304 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4305 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4312 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4314 /* We aren't passed a decl, so we can't check that a call is local.
4315 However, it isn't clear that that would be a win anyway, since it
4316 might limit some tail-calling opportunities. */
4317 enum arm_pcs pcs_variant;
4321 const_tree fndecl = NULL_TREE;
4323 if (TREE_CODE (fntype) == FUNCTION_DECL)
4326 fntype = TREE_TYPE (fntype);
4329 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4332 pcs_variant = arm_pcs_default;
4334 if (pcs_variant != ARM_PCS_AAPCS)
4338 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4339 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4348 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4351 /* We aren't passed a decl, so we can't check that a call is local.
4352 However, it isn't clear that that would be a win anyway, since it
4353 might limit some tail-calling opportunities. */
4354 enum arm_pcs pcs_variant;
4355 int unsignedp ATTRIBUTE_UNUSED;
4359 const_tree fndecl = NULL_TREE;
4361 if (TREE_CODE (fntype) == FUNCTION_DECL)
4364 fntype = TREE_TYPE (fntype);
4367 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4370 pcs_variant = arm_pcs_default;
4372 /* Promote integer types. */
4373 if (type && INTEGRAL_TYPE_P (type))
4374 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4376 if (pcs_variant != ARM_PCS_AAPCS)
4380 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4381 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4383 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4387 /* Promotes small structs returned in a register to full-word size
4388 for big-endian AAPCS. */
4389 if (type && arm_return_in_msb (type))
4391 HOST_WIDE_INT size = int_size_in_bytes (type);
4392 if (size % UNITS_PER_WORD != 0)
4394 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4395 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4399 return gen_rtx_REG (mode, R0_REGNUM);
4403 aapcs_libcall_value (enum machine_mode mode)
4405 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4408 /* Lay out a function argument using the AAPCS rules. The rule
4409 numbers referred to here are those in the AAPCS. */
4411 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4412 const_tree type, bool named)
4417 /* We only need to do this once per argument. */
4418 if (pcum->aapcs_arg_processed)
4421 pcum->aapcs_arg_processed = true;
4423 /* Special case: if named is false then we are handling an incoming
4424 anonymous argument which is on the stack. */
4428 /* Is this a potential co-processor register candidate? */
4429 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4431 int slot = aapcs_select_call_coproc (pcum, mode, type);
4432 pcum->aapcs_cprc_slot = slot;
4434 /* We don't have to apply any of the rules from part B of the
4435 preparation phase, these are handled elsewhere in the
4440 /* A Co-processor register candidate goes either in its own
4441 class of registers or on the stack. */
4442 if (!pcum->aapcs_cprc_failed[slot])
4444 /* C1.cp - Try to allocate the argument to co-processor
4446 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4449 /* C2.cp - Put the argument on the stack and note that we
4450 can't assign any more candidates in this slot. We also
4451 need to note that we have allocated stack space, so that
4452 we won't later try to split a non-cprc candidate between
4453 core registers and the stack. */
4454 pcum->aapcs_cprc_failed[slot] = true;
4455 pcum->can_split = false;
4458 /* We didn't get a register, so this argument goes on the
4460 gcc_assert (pcum->can_split == false);
4465 /* C3 - For double-word aligned arguments, round the NCRN up to the
4466 next even number. */
4467 ncrn = pcum->aapcs_ncrn;
4468 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4471 nregs = ARM_NUM_REGS2(mode, type);
4473 /* Sigh, this test should really assert that nregs > 0, but a GCC
4474 extension allows empty structs and then gives them empty size; it
4475 then allows such a structure to be passed by value. For some of
4476 the code below we have to pretend that such an argument has
4477 non-zero size so that we 'locate' it correctly either in
4478 registers or on the stack. */
4479 gcc_assert (nregs >= 0);
4481 nregs2 = nregs ? nregs : 1;
4483 /* C4 - Argument fits entirely in core registers. */
4484 if (ncrn + nregs2 <= NUM_ARG_REGS)
4486 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4487 pcum->aapcs_next_ncrn = ncrn + nregs;
4491 /* C5 - Some core registers left and there are no arguments already
4492 on the stack: split this argument between the remaining core
4493 registers and the stack. */
4494 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4496 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4497 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4498 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4502 /* C6 - NCRN is set to 4. */
4503 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4505 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4509 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4510 for a call to a function whose data type is FNTYPE.
4511 For a library call, FNTYPE is NULL. */
4513 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4515 tree fndecl ATTRIBUTE_UNUSED)
4517 /* Long call handling. */
4519 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4521 pcum->pcs_variant = arm_pcs_default;
4523 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4525 if (arm_libcall_uses_aapcs_base (libname))
4526 pcum->pcs_variant = ARM_PCS_AAPCS;
4528 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4529 pcum->aapcs_reg = NULL_RTX;
4530 pcum->aapcs_partial = 0;
4531 pcum->aapcs_arg_processed = false;
4532 pcum->aapcs_cprc_slot = -1;
4533 pcum->can_split = true;
4535 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4539 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4541 pcum->aapcs_cprc_failed[i] = false;
4542 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4550 /* On the ARM, the offset starts at 0. */
4552 pcum->iwmmxt_nregs = 0;
4553 pcum->can_split = true;
4555 /* Varargs vectors are treated the same as long long.
4556 named_count avoids having to change the way arm handles 'named' */
4557 pcum->named_count = 0;
4560 if (TARGET_REALLY_IWMMXT && fntype)
4564 for (fn_arg = TYPE_ARG_TYPES (fntype);
4566 fn_arg = TREE_CHAIN (fn_arg))
4567 pcum->named_count += 1;
4569 if (! pcum->named_count)
4570 pcum->named_count = INT_MAX;
4575 /* Return true if mode/type need doubleword alignment. */
4577 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4579 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4580 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4584 /* Determine where to put an argument to a function.
4585 Value is zero to push the argument on the stack,
4586 or a hard register in which to store the argument.
4588 MODE is the argument's machine mode.
4589 TYPE is the data type of the argument (as a tree).
4590 This is null for libcalls where that information may
4592 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4593 the preceding args and about the function being called.
4594 NAMED is nonzero if this argument is a named parameter
4595 (otherwise it is an extra parameter matching an ellipsis).
4597 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4598 other arguments are passed on the stack. If (NAMED == 0) (which happens
4599 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4600 defined), say it is passed in the stack (function_prologue will
4601 indeed make it pass in the stack if necessary). */
4604 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4605 const_tree type, bool named)
4609 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4610 a call insn (op3 of a call_value insn). */
4611 if (mode == VOIDmode)
4614 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4616 aapcs_layout_arg (pcum, mode, type, named);
4617 return pcum->aapcs_reg;
4620 /* Varargs vectors are treated the same as long long.
4621 named_count avoids having to change the way arm handles 'named' */
4622 if (TARGET_IWMMXT_ABI
4623 && arm_vector_mode_supported_p (mode)
4624 && pcum->named_count > pcum->nargs + 1)
4626 if (pcum->iwmmxt_nregs <= 9)
4627 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4630 pcum->can_split = false;
4635 /* Put doubleword aligned quantities in even register pairs. */
4637 && ARM_DOUBLEWORD_ALIGN
4638 && arm_needs_doubleword_align (mode, type))
4641 /* Only allow splitting an arg between regs and memory if all preceding
4642 args were allocated to regs. For args passed by reference we only count
4643 the reference pointer. */
4644 if (pcum->can_split)
4647 nregs = ARM_NUM_REGS2 (mode, type);
4649 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4652 return gen_rtx_REG (mode, pcum->nregs);
4656 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4658 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4659 ? DOUBLEWORD_ALIGNMENT
4664 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4665 tree type, bool named)
4667 int nregs = pcum->nregs;
4669 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4671 aapcs_layout_arg (pcum, mode, type, named);
4672 return pcum->aapcs_partial;
4675 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4678 if (NUM_ARG_REGS > nregs
4679 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4681 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4686 /* Update the data in PCUM to advance over an argument
4687 of mode MODE and data type TYPE.
4688 (TYPE is null for libcalls where that information may not be available.) */
4691 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4692 const_tree type, bool named)
4694 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4696 aapcs_layout_arg (pcum, mode, type, named);
4698 if (pcum->aapcs_cprc_slot >= 0)
4700 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4702 pcum->aapcs_cprc_slot = -1;
4705 /* Generic stuff. */
4706 pcum->aapcs_arg_processed = false;
4707 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4708 pcum->aapcs_reg = NULL_RTX;
4709 pcum->aapcs_partial = 0;
4714 if (arm_vector_mode_supported_p (mode)
4715 && pcum->named_count > pcum->nargs
4716 && TARGET_IWMMXT_ABI)
4717 pcum->iwmmxt_nregs += 1;
4719 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4723 /* Variable sized types are passed by reference. This is a GCC
4724 extension to the ARM ABI. */
4727 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4728 enum machine_mode mode ATTRIBUTE_UNUSED,
4729 const_tree type, bool named ATTRIBUTE_UNUSED)
4731 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4734 /* Encode the current state of the #pragma [no_]long_calls. */
4737 OFF, /* No #pragma [no_]long_calls is in effect. */
4738 LONG, /* #pragma long_calls is in effect. */
4739 SHORT /* #pragma no_long_calls is in effect. */
4742 static arm_pragma_enum arm_pragma_long_calls = OFF;
4745 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4747 arm_pragma_long_calls = LONG;
4751 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4753 arm_pragma_long_calls = SHORT;
4757 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4759 arm_pragma_long_calls = OFF;
4762 /* Handle an attribute requiring a FUNCTION_DECL;
4763 arguments as in struct attribute_spec.handler. */
4765 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4766 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4768 if (TREE_CODE (*node) != FUNCTION_DECL)
4770 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4772 *no_add_attrs = true;
4778 /* Handle an "interrupt" or "isr" attribute;
4779 arguments as in struct attribute_spec.handler. */
4781 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4786 if (TREE_CODE (*node) != FUNCTION_DECL)
4788 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4790 *no_add_attrs = true;
4792 /* FIXME: the argument if any is checked for type attributes;
4793 should it be checked for decl ones? */
4797 if (TREE_CODE (*node) == FUNCTION_TYPE
4798 || TREE_CODE (*node) == METHOD_TYPE)
4800 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4802 warning (OPT_Wattributes, "%qE attribute ignored",
4804 *no_add_attrs = true;
4807 else if (TREE_CODE (*node) == POINTER_TYPE
4808 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4809 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4810 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4812 *node = build_variant_type_copy (*node);
4813 TREE_TYPE (*node) = build_type_attribute_variant
4815 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4816 *no_add_attrs = true;
4820 /* Possibly pass this attribute on from the type to a decl. */
4821 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4822 | (int) ATTR_FLAG_FUNCTION_NEXT
4823 | (int) ATTR_FLAG_ARRAY_NEXT))
4825 *no_add_attrs = true;
4826 return tree_cons (name, args, NULL_TREE);
4830 warning (OPT_Wattributes, "%qE attribute ignored",
4839 /* Handle a "pcs" attribute; arguments as in struct
4840 attribute_spec.handler. */
4842 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4843 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4845 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4847 warning (OPT_Wattributes, "%qE attribute ignored", name);
4848 *no_add_attrs = true;
4853 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4854 /* Handle the "notshared" attribute. This attribute is another way of
4855 requesting hidden visibility. ARM's compiler supports
4856 "__declspec(notshared)"; we support the same thing via an
4860 arm_handle_notshared_attribute (tree *node,
4861 tree name ATTRIBUTE_UNUSED,
4862 tree args ATTRIBUTE_UNUSED,
4863 int flags ATTRIBUTE_UNUSED,
4866 tree decl = TYPE_NAME (*node);
4870 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4871 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4872 *no_add_attrs = false;
4878 /* Return 0 if the attributes for two types are incompatible, 1 if they
4879 are compatible, and 2 if they are nearly compatible (which causes a
4880 warning to be generated). */
4882 arm_comp_type_attributes (const_tree type1, const_tree type2)
4886 /* Check for mismatch of non-default calling convention. */
4887 if (TREE_CODE (type1) != FUNCTION_TYPE)
4890 /* Check for mismatched call attributes. */
4891 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4892 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4893 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4894 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4896 /* Only bother to check if an attribute is defined. */
4897 if (l1 | l2 | s1 | s2)
4899 /* If one type has an attribute, the other must have the same attribute. */
4900 if ((l1 != l2) || (s1 != s2))
4903 /* Disallow mixed attributes. */
4904 if ((l1 & s2) || (l2 & s1))
4908 /* Check for mismatched ISR attribute. */
4909 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4911 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4912 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4914 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4921 /* Assigns default attributes to newly defined type. This is used to
4922 set short_call/long_call attributes for function types of
4923 functions defined inside corresponding #pragma scopes. */
4925 arm_set_default_type_attributes (tree type)
4927 /* Add __attribute__ ((long_call)) to all functions, when
4928 inside #pragma long_calls or __attribute__ ((short_call)),
4929 when inside #pragma no_long_calls. */
4930 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4932 tree type_attr_list, attr_name;
4933 type_attr_list = TYPE_ATTRIBUTES (type);
4935 if (arm_pragma_long_calls == LONG)
4936 attr_name = get_identifier ("long_call");
4937 else if (arm_pragma_long_calls == SHORT)
4938 attr_name = get_identifier ("short_call");
4942 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4943 TYPE_ATTRIBUTES (type) = type_attr_list;
4947 /* Return true if DECL is known to be linked into section SECTION. */
4950 arm_function_in_section_p (tree decl, section *section)
4952 /* We can only be certain about functions defined in the same
4953 compilation unit. */
4954 if (!TREE_STATIC (decl))
4957 /* Make sure that SYMBOL always binds to the definition in this
4958 compilation unit. */
4959 if (!targetm.binds_local_p (decl))
4962 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4963 if (!DECL_SECTION_NAME (decl))
4965 /* Make sure that we will not create a unique section for DECL. */
4966 if (flag_function_sections || DECL_ONE_ONLY (decl))
4970 return function_section (decl) == section;
4973 /* Return nonzero if a 32-bit "long_call" should be generated for
4974 a call from the current function to DECL. We generate a long_call
4977 a. has an __attribute__((long call))
4978 or b. is within the scope of a #pragma long_calls
4979 or c. the -mlong-calls command line switch has been specified
4981 However we do not generate a long call if the function:
4983 d. has an __attribute__ ((short_call))
4984 or e. is inside the scope of a #pragma no_long_calls
4985 or f. is defined in the same section as the current function. */
4988 arm_is_long_call_p (tree decl)
4993 return TARGET_LONG_CALLS;
4995 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4996 if (lookup_attribute ("short_call", attrs))
4999 /* For "f", be conservative, and only cater for cases in which the
5000 whole of the current function is placed in the same section. */
5001 if (!flag_reorder_blocks_and_partition
5002 && TREE_CODE (decl) == FUNCTION_DECL
5003 && arm_function_in_section_p (decl, current_function_section ()))
5006 if (lookup_attribute ("long_call", attrs))
5009 return TARGET_LONG_CALLS;
5012 /* Return nonzero if it is ok to make a tail-call to DECL. */
5014 arm_function_ok_for_sibcall (tree decl, tree exp)
5016 unsigned long func_type;
5018 if (cfun->machine->sibcall_blocked)
5021 /* Never tailcall something for which we have no decl, or if we
5022 are generating code for Thumb-1. */
5023 if (decl == NULL || TARGET_THUMB1)
5026 /* The PIC register is live on entry to VxWorks PLT entries, so we
5027 must make the call before restoring the PIC register. */
5028 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5031 /* Cannot tail-call to long calls, since these are out of range of
5032 a branch instruction. */
5033 if (arm_is_long_call_p (decl))
5036 /* If we are interworking and the function is not declared static
5037 then we can't tail-call it unless we know that it exists in this
5038 compilation unit (since it might be a Thumb routine). */
5039 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5042 func_type = arm_current_func_type ();
5043 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5044 if (IS_INTERRUPT (func_type))
5047 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5049 /* Check that the return value locations are the same. For
5050 example that we aren't returning a value from the sibling in
5051 a VFP register but then need to transfer it to a core
5055 a = arm_function_value (TREE_TYPE (exp), decl, false);
5056 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5058 if (!rtx_equal_p (a, b))
5062 /* Never tailcall if function may be called with a misaligned SP. */
5063 if (IS_STACKALIGN (func_type))
5066 /* Everything else is ok. */
5071 /* Addressing mode support functions. */
5073 /* Return nonzero if X is a legitimate immediate operand when compiling
5074 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5076 legitimate_pic_operand_p (rtx x)
5078 if (GET_CODE (x) == SYMBOL_REF
5079 || (GET_CODE (x) == CONST
5080 && GET_CODE (XEXP (x, 0)) == PLUS
5081 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5087 /* Record that the current function needs a PIC register. Initialize
5088 cfun->machine->pic_reg if we have not already done so. */
5091 require_pic_register (void)
5093 /* A lot of the logic here is made obscure by the fact that this
5094 routine gets called as part of the rtx cost estimation process.
5095 We don't want those calls to affect any assumptions about the real
5096 function; and further, we can't call entry_of_function() until we
5097 start the real expansion process. */
5098 if (!crtl->uses_pic_offset_table)
5100 gcc_assert (can_create_pseudo_p ());
5101 if (arm_pic_register != INVALID_REGNUM)
5103 if (!cfun->machine->pic_reg)
5104 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5106 /* Play games to avoid marking the function as needing pic
5107 if we are being called as part of the cost-estimation
5109 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5110 crtl->uses_pic_offset_table = 1;
5116 if (!cfun->machine->pic_reg)
5117 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5119 /* Play games to avoid marking the function as needing pic
5120 if we are being called as part of the cost-estimation
5122 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5124 crtl->uses_pic_offset_table = 1;
5127 arm_load_pic_register (0UL);
5131 /* We can be called during expansion of PHI nodes, where
5132 we can't yet emit instructions directly in the final
5133 insn stream. Queue the insns on the entry edge, they will
5134 be committed after everything else is expanded. */
5135 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5142 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5144 if (GET_CODE (orig) == SYMBOL_REF
5145 || GET_CODE (orig) == LABEL_REF)
5151 gcc_assert (can_create_pseudo_p ());
5152 reg = gen_reg_rtx (Pmode);
5155 /* VxWorks does not impose a fixed gap between segments; the run-time
5156 gap can be different from the object-file gap. We therefore can't
5157 use GOTOFF unless we are absolutely sure that the symbol is in the
5158 same segment as the GOT. Unfortunately, the flexibility of linker
5159 scripts means that we can't be sure of that in general, so assume
5160 that GOTOFF is never valid on VxWorks. */
5161 if ((GET_CODE (orig) == LABEL_REF
5162 || (GET_CODE (orig) == SYMBOL_REF &&
5163 SYMBOL_REF_LOCAL_P (orig)))
5165 && !TARGET_VXWORKS_RTP)
5166 insn = arm_pic_static_addr (orig, reg);
5172 /* If this function doesn't have a pic register, create one now. */
5173 require_pic_register ();
5175 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5177 /* Make the MEM as close to a constant as possible. */
5178 mem = SET_SRC (pat);
5179 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5180 MEM_READONLY_P (mem) = 1;
5181 MEM_NOTRAP_P (mem) = 1;
5183 insn = emit_insn (pat);
5186 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5188 set_unique_reg_note (insn, REG_EQUAL, orig);
5192 else if (GET_CODE (orig) == CONST)
5196 if (GET_CODE (XEXP (orig, 0)) == PLUS
5197 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5200 /* Handle the case where we have: const (UNSPEC_TLS). */
5201 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5202 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5205 /* Handle the case where we have:
5206 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5208 if (GET_CODE (XEXP (orig, 0)) == PLUS
5209 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5210 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5212 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5218 gcc_assert (can_create_pseudo_p ());
5219 reg = gen_reg_rtx (Pmode);
5222 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5224 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5225 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5226 base == reg ? 0 : reg);
5228 if (GET_CODE (offset) == CONST_INT)
5230 /* The base register doesn't really matter, we only want to
5231 test the index for the appropriate mode. */
5232 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5234 gcc_assert (can_create_pseudo_p ());
5235 offset = force_reg (Pmode, offset);
5238 if (GET_CODE (offset) == CONST_INT)
5239 return plus_constant (base, INTVAL (offset));
5242 if (GET_MODE_SIZE (mode) > 4
5243 && (GET_MODE_CLASS (mode) == MODE_INT
5244 || TARGET_SOFT_FLOAT))
5246 emit_insn (gen_addsi3 (reg, base, offset));
5250 return gen_rtx_PLUS (Pmode, base, offset);
5257 /* Find a spare register to use during the prolog of a function. */
5260 thumb_find_work_register (unsigned long pushed_regs_mask)
5264 /* Check the argument registers first as these are call-used. The
5265 register allocation order means that sometimes r3 might be used
5266 but earlier argument registers might not, so check them all. */
5267 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5268 if (!df_regs_ever_live_p (reg))
5271 /* Before going on to check the call-saved registers we can try a couple
5272 more ways of deducing that r3 is available. The first is when we are
5273 pushing anonymous arguments onto the stack and we have less than 4
5274 registers worth of fixed arguments(*). In this case r3 will be part of
5275 the variable argument list and so we can be sure that it will be
5276 pushed right at the start of the function. Hence it will be available
5277 for the rest of the prologue.
5278 (*): ie crtl->args.pretend_args_size is greater than 0. */
5279 if (cfun->machine->uses_anonymous_args
5280 && crtl->args.pretend_args_size > 0)
5281 return LAST_ARG_REGNUM;
5283 /* The other case is when we have fixed arguments but less than 4 registers
5284 worth. In this case r3 might be used in the body of the function, but
5285 it is not being used to convey an argument into the function. In theory
5286 we could just check crtl->args.size to see how many bytes are
5287 being passed in argument registers, but it seems that it is unreliable.
5288 Sometimes it will have the value 0 when in fact arguments are being
5289 passed. (See testcase execute/20021111-1.c for an example). So we also
5290 check the args_info.nregs field as well. The problem with this field is
5291 that it makes no allowances for arguments that are passed to the
5292 function but which are not used. Hence we could miss an opportunity
5293 when a function has an unused argument in r3. But it is better to be
5294 safe than to be sorry. */
5295 if (! cfun->machine->uses_anonymous_args
5296 && crtl->args.size >= 0
5297 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5298 && crtl->args.info.nregs < 4)
5299 return LAST_ARG_REGNUM;
5301 /* Otherwise look for a call-saved register that is going to be pushed. */
5302 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5303 if (pushed_regs_mask & (1 << reg))
5308 /* Thumb-2 can use high regs. */
5309 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5310 if (pushed_regs_mask & (1 << reg))
5313 /* Something went wrong - thumb_compute_save_reg_mask()
5314 should have arranged for a suitable register to be pushed. */
5318 static GTY(()) int pic_labelno;
5320 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5324 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5326 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5328 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5331 gcc_assert (flag_pic);
5333 pic_reg = cfun->machine->pic_reg;
5334 if (TARGET_VXWORKS_RTP)
5336 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5337 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5338 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5340 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5342 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5343 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5347 /* We use an UNSPEC rather than a LABEL_REF because this label
5348 never appears in the code stream. */
5350 labelno = GEN_INT (pic_labelno++);
5351 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5352 l1 = gen_rtx_CONST (VOIDmode, l1);
5354 /* On the ARM the PC register contains 'dot + 8' at the time of the
5355 addition, on the Thumb it is 'dot + 4'. */
5356 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5357 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5359 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5363 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5365 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5367 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5369 else /* TARGET_THUMB1 */
5371 if (arm_pic_register != INVALID_REGNUM
5372 && REGNO (pic_reg) > LAST_LO_REGNUM)
5374 /* We will have pushed the pic register, so we should always be
5375 able to find a work register. */
5376 pic_tmp = gen_rtx_REG (SImode,
5377 thumb_find_work_register (saved_regs));
5378 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5379 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5382 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5383 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5387 /* Need to emit this whether or not we obey regdecls,
5388 since setjmp/longjmp can cause life info to screw up. */
5392 /* Generate code to load the address of a static var when flag_pic is set. */
5394 arm_pic_static_addr (rtx orig, rtx reg)
5396 rtx l1, labelno, offset_rtx, insn;
5398 gcc_assert (flag_pic);
5400 /* We use an UNSPEC rather than a LABEL_REF because this label
5401 never appears in the code stream. */
5402 labelno = GEN_INT (pic_labelno++);
5403 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5404 l1 = gen_rtx_CONST (VOIDmode, l1);
5406 /* On the ARM the PC register contains 'dot + 8' at the time of the
5407 addition, on the Thumb it is 'dot + 4'. */
5408 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5409 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5410 UNSPEC_SYMBOL_OFFSET);
5411 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5415 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5417 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5419 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5421 else /* TARGET_THUMB1 */
5423 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5424 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5430 /* Return nonzero if X is valid as an ARM state addressing register. */
5432 arm_address_register_rtx_p (rtx x, int strict_p)
5436 if (GET_CODE (x) != REG)
5442 return ARM_REGNO_OK_FOR_BASE_P (regno);
5444 return (regno <= LAST_ARM_REGNUM
5445 || regno >= FIRST_PSEUDO_REGISTER
5446 || regno == FRAME_POINTER_REGNUM
5447 || regno == ARG_POINTER_REGNUM);
5450 /* Return TRUE if this rtx is the difference of a symbol and a label,
5451 and will reduce to a PC-relative relocation in the object file.
5452 Expressions like this can be left alone when generating PIC, rather
5453 than forced through the GOT. */
5455 pcrel_constant_p (rtx x)
5457 if (GET_CODE (x) == MINUS)
5458 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5463 /* Return true if X will surely end up in an index register after next
5466 will_be_in_index_register (const_rtx x)
5468 /* arm.md: calculate_pic_address will split this into a register. */
5469 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5472 /* Return nonzero if X is a valid ARM state address operand. */
5474 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5478 enum rtx_code code = GET_CODE (x);
5480 if (arm_address_register_rtx_p (x, strict_p))
5483 use_ldrd = (TARGET_LDRD
5485 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5487 if (code == POST_INC || code == PRE_DEC
5488 || ((code == PRE_INC || code == POST_DEC)
5489 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5490 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5492 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5493 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5494 && GET_CODE (XEXP (x, 1)) == PLUS
5495 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5497 rtx addend = XEXP (XEXP (x, 1), 1);
5499 /* Don't allow ldrd post increment by register because it's hard
5500 to fixup invalid register choices. */
5502 && GET_CODE (x) == POST_MODIFY
5503 && GET_CODE (addend) == REG)
5506 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5507 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5510 /* After reload constants split into minipools will have addresses
5511 from a LABEL_REF. */
5512 else if (reload_completed
5513 && (code == LABEL_REF
5515 && GET_CODE (XEXP (x, 0)) == PLUS
5516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5517 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5520 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5523 else if (code == PLUS)
5525 rtx xop0 = XEXP (x, 0);
5526 rtx xop1 = XEXP (x, 1);
5528 return ((arm_address_register_rtx_p (xop0, strict_p)
5529 && ((GET_CODE(xop1) == CONST_INT
5530 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5531 || (!strict_p && will_be_in_index_register (xop1))))
5532 || (arm_address_register_rtx_p (xop1, strict_p)
5533 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5537 /* Reload currently can't handle MINUS, so disable this for now */
5538 else if (GET_CODE (x) == MINUS)
5540 rtx xop0 = XEXP (x, 0);
5541 rtx xop1 = XEXP (x, 1);
5543 return (arm_address_register_rtx_p (xop0, strict_p)
5544 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5548 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5549 && code == SYMBOL_REF
5550 && CONSTANT_POOL_ADDRESS_P (x)
5552 && symbol_mentioned_p (get_pool_constant (x))
5553 && ! pcrel_constant_p (get_pool_constant (x))))
5559 /* Return nonzero if X is a valid Thumb-2 address operand. */
5561 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5564 enum rtx_code code = GET_CODE (x);
5566 if (arm_address_register_rtx_p (x, strict_p))
5569 use_ldrd = (TARGET_LDRD
5571 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5573 if (code == POST_INC || code == PRE_DEC
5574 || ((code == PRE_INC || code == POST_DEC)
5575 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5576 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5578 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5579 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5580 && GET_CODE (XEXP (x, 1)) == PLUS
5581 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5583 /* Thumb-2 only has autoincrement by constant. */
5584 rtx addend = XEXP (XEXP (x, 1), 1);
5585 HOST_WIDE_INT offset;
5587 if (GET_CODE (addend) != CONST_INT)
5590 offset = INTVAL(addend);
5591 if (GET_MODE_SIZE (mode) <= 4)
5592 return (offset > -256 && offset < 256);
5594 return (use_ldrd && offset > -1024 && offset < 1024
5595 && (offset & 3) == 0);
5598 /* After reload constants split into minipools will have addresses
5599 from a LABEL_REF. */
5600 else if (reload_completed
5601 && (code == LABEL_REF
5603 && GET_CODE (XEXP (x, 0)) == PLUS
5604 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5605 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5608 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5611 else if (code == PLUS)
5613 rtx xop0 = XEXP (x, 0);
5614 rtx xop1 = XEXP (x, 1);
5616 return ((arm_address_register_rtx_p (xop0, strict_p)
5617 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5618 || (!strict_p && will_be_in_index_register (xop1))))
5619 || (arm_address_register_rtx_p (xop1, strict_p)
5620 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5623 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5624 && code == SYMBOL_REF
5625 && CONSTANT_POOL_ADDRESS_P (x)
5627 && symbol_mentioned_p (get_pool_constant (x))
5628 && ! pcrel_constant_p (get_pool_constant (x))))
5634 /* Return nonzero if INDEX is valid for an address index operand in
5637 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5640 HOST_WIDE_INT range;
5641 enum rtx_code code = GET_CODE (index);
5643 /* Standard coprocessor addressing modes. */
5644 if (TARGET_HARD_FLOAT
5645 && (TARGET_FPA || TARGET_MAVERICK)
5646 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5647 || (TARGET_MAVERICK && mode == DImode)))
5648 return (code == CONST_INT && INTVAL (index) < 1024
5649 && INTVAL (index) > -1024
5650 && (INTVAL (index) & 3) == 0);
5652 /* For quad modes, we restrict the constant offset to be slightly less
5653 than what the instruction format permits. We do this because for
5654 quad mode moves, we will actually decompose them into two separate
5655 double-mode reads or writes. INDEX must therefore be a valid
5656 (double-mode) offset and so should INDEX+8. */
5657 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5658 return (code == CONST_INT
5659 && INTVAL (index) < 1016
5660 && INTVAL (index) > -1024
5661 && (INTVAL (index) & 3) == 0);
5663 /* We have no such constraint on double mode offsets, so we permit the
5664 full range of the instruction format. */
5665 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5666 return (code == CONST_INT
5667 && INTVAL (index) < 1024
5668 && INTVAL (index) > -1024
5669 && (INTVAL (index) & 3) == 0);
5671 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5672 return (code == CONST_INT
5673 && INTVAL (index) < 1024
5674 && INTVAL (index) > -1024
5675 && (INTVAL (index) & 3) == 0);
5677 if (arm_address_register_rtx_p (index, strict_p)
5678 && (GET_MODE_SIZE (mode) <= 4))
5681 if (mode == DImode || mode == DFmode)
5683 if (code == CONST_INT)
5685 HOST_WIDE_INT val = INTVAL (index);
5688 return val > -256 && val < 256;
5690 return val > -4096 && val < 4092;
5693 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5696 if (GET_MODE_SIZE (mode) <= 4
5700 || (mode == QImode && outer == SIGN_EXTEND))))
5704 rtx xiop0 = XEXP (index, 0);
5705 rtx xiop1 = XEXP (index, 1);
5707 return ((arm_address_register_rtx_p (xiop0, strict_p)
5708 && power_of_two_operand (xiop1, SImode))
5709 || (arm_address_register_rtx_p (xiop1, strict_p)
5710 && power_of_two_operand (xiop0, SImode)));
5712 else if (code == LSHIFTRT || code == ASHIFTRT
5713 || code == ASHIFT || code == ROTATERT)
5715 rtx op = XEXP (index, 1);
5717 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5718 && GET_CODE (op) == CONST_INT
5720 && INTVAL (op) <= 31);
5724 /* For ARM v4 we may be doing a sign-extend operation during the
5730 || (outer == SIGN_EXTEND && mode == QImode))
5736 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5738 return (code == CONST_INT
5739 && INTVAL (index) < range
5740 && INTVAL (index) > -range);
5743 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5744 index operand. i.e. 1, 2, 4 or 8. */
5746 thumb2_index_mul_operand (rtx op)
5750 if (GET_CODE(op) != CONST_INT)
5754 return (val == 1 || val == 2 || val == 4 || val == 8);
5757 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5759 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5761 enum rtx_code code = GET_CODE (index);
5763 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5764 /* Standard coprocessor addressing modes. */
5765 if (TARGET_HARD_FLOAT
5766 && (TARGET_FPA || TARGET_MAVERICK)
5767 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5768 || (TARGET_MAVERICK && mode == DImode)))
5769 return (code == CONST_INT && INTVAL (index) < 1024
5770 && INTVAL (index) > -1024
5771 && (INTVAL (index) & 3) == 0);
5773 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5775 /* For DImode assume values will usually live in core regs
5776 and only allow LDRD addressing modes. */
5777 if (!TARGET_LDRD || mode != DImode)
5778 return (code == CONST_INT
5779 && INTVAL (index) < 1024
5780 && INTVAL (index) > -1024
5781 && (INTVAL (index) & 3) == 0);
5784 /* For quad modes, we restrict the constant offset to be slightly less
5785 than what the instruction format permits. We do this because for
5786 quad mode moves, we will actually decompose them into two separate
5787 double-mode reads or writes. INDEX must therefore be a valid
5788 (double-mode) offset and so should INDEX+8. */
5789 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5790 return (code == CONST_INT
5791 && INTVAL (index) < 1016
5792 && INTVAL (index) > -1024
5793 && (INTVAL (index) & 3) == 0);
5795 /* We have no such constraint on double mode offsets, so we permit the
5796 full range of the instruction format. */
5797 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5798 return (code == CONST_INT
5799 && INTVAL (index) < 1024
5800 && INTVAL (index) > -1024
5801 && (INTVAL (index) & 3) == 0);
5803 if (arm_address_register_rtx_p (index, strict_p)
5804 && (GET_MODE_SIZE (mode) <= 4))
5807 if (mode == DImode || mode == DFmode)
5809 if (code == CONST_INT)
5811 HOST_WIDE_INT val = INTVAL (index);
5812 /* ??? Can we assume ldrd for thumb2? */
5813 /* Thumb-2 ldrd only has reg+const addressing modes. */
5814 /* ldrd supports offsets of +-1020.
5815 However the ldr fallback does not. */
5816 return val > -256 && val < 256 && (val & 3) == 0;
5824 rtx xiop0 = XEXP (index, 0);
5825 rtx xiop1 = XEXP (index, 1);
5827 return ((arm_address_register_rtx_p (xiop0, strict_p)
5828 && thumb2_index_mul_operand (xiop1))
5829 || (arm_address_register_rtx_p (xiop1, strict_p)
5830 && thumb2_index_mul_operand (xiop0)));
5832 else if (code == ASHIFT)
5834 rtx op = XEXP (index, 1);
5836 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5837 && GET_CODE (op) == CONST_INT
5839 && INTVAL (op) <= 3);
5842 return (code == CONST_INT
5843 && INTVAL (index) < 4096
5844 && INTVAL (index) > -256);
5847 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5849 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5853 if (GET_CODE (x) != REG)
5859 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5861 return (regno <= LAST_LO_REGNUM
5862 || regno > LAST_VIRTUAL_REGISTER
5863 || regno == FRAME_POINTER_REGNUM
5864 || (GET_MODE_SIZE (mode) >= 4
5865 && (regno == STACK_POINTER_REGNUM
5866 || regno >= FIRST_PSEUDO_REGISTER
5867 || x == hard_frame_pointer_rtx
5868 || x == arg_pointer_rtx)));
5871 /* Return nonzero if x is a legitimate index register. This is the case
5872 for any base register that can access a QImode object. */
5874 thumb1_index_register_rtx_p (rtx x, int strict_p)
5876 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5879 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5881 The AP may be eliminated to either the SP or the FP, so we use the
5882 least common denominator, e.g. SImode, and offsets from 0 to 64.
5884 ??? Verify whether the above is the right approach.
5886 ??? Also, the FP may be eliminated to the SP, so perhaps that
5887 needs special handling also.
5889 ??? Look at how the mips16 port solves this problem. It probably uses
5890 better ways to solve some of these problems.
5892 Although it is not incorrect, we don't accept QImode and HImode
5893 addresses based on the frame pointer or arg pointer until the
5894 reload pass starts. This is so that eliminating such addresses
5895 into stack based ones won't produce impossible code. */
5897 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5899 /* ??? Not clear if this is right. Experiment. */
5900 if (GET_MODE_SIZE (mode) < 4
5901 && !(reload_in_progress || reload_completed)
5902 && (reg_mentioned_p (frame_pointer_rtx, x)
5903 || reg_mentioned_p (arg_pointer_rtx, x)
5904 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5905 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5906 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5907 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5910 /* Accept any base register. SP only in SImode or larger. */
5911 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5914 /* This is PC relative data before arm_reorg runs. */
5915 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5916 && GET_CODE (x) == SYMBOL_REF
5917 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5920 /* This is PC relative data after arm_reorg runs. */
5921 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5923 && (GET_CODE (x) == LABEL_REF
5924 || (GET_CODE (x) == CONST
5925 && GET_CODE (XEXP (x, 0)) == PLUS
5926 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5927 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5930 /* Post-inc indexing only supported for SImode and larger. */
5931 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5932 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5935 else if (GET_CODE (x) == PLUS)
5937 /* REG+REG address can be any two index registers. */
5938 /* We disallow FRAME+REG addressing since we know that FRAME
5939 will be replaced with STACK, and SP relative addressing only
5940 permits SP+OFFSET. */
5941 if (GET_MODE_SIZE (mode) <= 4
5942 && XEXP (x, 0) != frame_pointer_rtx
5943 && XEXP (x, 1) != frame_pointer_rtx
5944 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5945 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5946 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5949 /* REG+const has 5-7 bit offset for non-SP registers. */
5950 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5951 || XEXP (x, 0) == arg_pointer_rtx)
5952 && GET_CODE (XEXP (x, 1)) == CONST_INT
5953 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5956 /* REG+const has 10-bit offset for SP, but only SImode and
5957 larger is supported. */
5958 /* ??? Should probably check for DI/DFmode overflow here
5959 just like GO_IF_LEGITIMATE_OFFSET does. */
5960 else if (GET_CODE (XEXP (x, 0)) == REG
5961 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5962 && GET_MODE_SIZE (mode) >= 4
5963 && GET_CODE (XEXP (x, 1)) == CONST_INT
5964 && INTVAL (XEXP (x, 1)) >= 0
5965 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5966 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5969 else if (GET_CODE (XEXP (x, 0)) == REG
5970 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5971 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5972 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5973 && REGNO (XEXP (x, 0))
5974 <= LAST_VIRTUAL_POINTER_REGISTER))
5975 && GET_MODE_SIZE (mode) >= 4
5976 && GET_CODE (XEXP (x, 1)) == CONST_INT
5977 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5981 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5982 && GET_MODE_SIZE (mode) == 4
5983 && GET_CODE (x) == SYMBOL_REF
5984 && CONSTANT_POOL_ADDRESS_P (x)
5986 && symbol_mentioned_p (get_pool_constant (x))
5987 && ! pcrel_constant_p (get_pool_constant (x))))
5993 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5994 instruction of mode MODE. */
5996 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5998 switch (GET_MODE_SIZE (mode))
6001 return val >= 0 && val < 32;
6004 return val >= 0 && val < 64 && (val & 1) == 0;
6008 && (val + GET_MODE_SIZE (mode)) <= 128
6014 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6017 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6018 else if (TARGET_THUMB2)
6019 return thumb2_legitimate_address_p (mode, x, strict_p);
6020 else /* if (TARGET_THUMB1) */
6021 return thumb1_legitimate_address_p (mode, x, strict_p);
6024 /* Build the SYMBOL_REF for __tls_get_addr. */
6026 static GTY(()) rtx tls_get_addr_libfunc;
6029 get_tls_get_addr (void)
6031 if (!tls_get_addr_libfunc)
6032 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6033 return tls_get_addr_libfunc;
6037 arm_load_tp (rtx target)
6040 target = gen_reg_rtx (SImode);
6044 /* Can return in any reg. */
6045 emit_insn (gen_load_tp_hard (target));
6049 /* Always returned in r0. Immediately copy the result into a pseudo,
6050 otherwise other uses of r0 (e.g. setting up function arguments) may
6051 clobber the value. */
6055 emit_insn (gen_load_tp_soft ());
6057 tmp = gen_rtx_REG (SImode, 0);
6058 emit_move_insn (target, tmp);
6064 load_tls_operand (rtx x, rtx reg)
6068 if (reg == NULL_RTX)
6069 reg = gen_reg_rtx (SImode);
6071 tmp = gen_rtx_CONST (SImode, x);
6073 emit_move_insn (reg, tmp);
6079 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6081 rtx insns, label, labelno, sum;
6085 labelno = GEN_INT (pic_labelno++);
6086 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6087 label = gen_rtx_CONST (VOIDmode, label);
6089 sum = gen_rtx_UNSPEC (Pmode,
6090 gen_rtvec (4, x, GEN_INT (reloc), label,
6091 GEN_INT (TARGET_ARM ? 8 : 4)),
6093 reg = load_tls_operand (sum, reg);
6096 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6097 else if (TARGET_THUMB2)
6098 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6099 else /* TARGET_THUMB1 */
6100 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6102 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6103 Pmode, 1, reg, Pmode);
6105 insns = get_insns ();
6112 legitimize_tls_address (rtx x, rtx reg)
6114 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6115 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6119 case TLS_MODEL_GLOBAL_DYNAMIC:
6120 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6121 dest = gen_reg_rtx (Pmode);
6122 emit_libcall_block (insns, dest, ret, x);
6125 case TLS_MODEL_LOCAL_DYNAMIC:
6126 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6128 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6129 share the LDM result with other LD model accesses. */
6130 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6132 dest = gen_reg_rtx (Pmode);
6133 emit_libcall_block (insns, dest, ret, eqv);
6135 /* Load the addend. */
6136 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6138 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6139 return gen_rtx_PLUS (Pmode, dest, addend);
6141 case TLS_MODEL_INITIAL_EXEC:
6142 labelno = GEN_INT (pic_labelno++);
6143 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6144 label = gen_rtx_CONST (VOIDmode, label);
6145 sum = gen_rtx_UNSPEC (Pmode,
6146 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6147 GEN_INT (TARGET_ARM ? 8 : 4)),
6149 reg = load_tls_operand (sum, reg);
6152 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6153 else if (TARGET_THUMB2)
6154 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6157 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6158 emit_move_insn (reg, gen_const_mem (SImode, reg));
6161 tp = arm_load_tp (NULL_RTX);
6163 return gen_rtx_PLUS (Pmode, tp, reg);
6165 case TLS_MODEL_LOCAL_EXEC:
6166 tp = arm_load_tp (NULL_RTX);
6168 reg = gen_rtx_UNSPEC (Pmode,
6169 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6171 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6173 return gen_rtx_PLUS (Pmode, tp, reg);
6180 /* Try machine-dependent ways of modifying an illegitimate address
6181 to be legitimate. If we find one, return the new, valid address. */
6183 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6187 /* TODO: legitimize_address for Thumb2. */
6190 return thumb_legitimize_address (x, orig_x, mode);
6193 if (arm_tls_symbol_p (x))
6194 return legitimize_tls_address (x, NULL_RTX);
6196 if (GET_CODE (x) == PLUS)
6198 rtx xop0 = XEXP (x, 0);
6199 rtx xop1 = XEXP (x, 1);
6201 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6202 xop0 = force_reg (SImode, xop0);
6204 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6205 xop1 = force_reg (SImode, xop1);
6207 if (ARM_BASE_REGISTER_RTX_P (xop0)
6208 && GET_CODE (xop1) == CONST_INT)
6210 HOST_WIDE_INT n, low_n;
6214 /* VFP addressing modes actually allow greater offsets, but for
6215 now we just stick with the lowest common denominator. */
6217 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6229 low_n = ((mode) == TImode ? 0
6230 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6234 base_reg = gen_reg_rtx (SImode);
6235 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6236 emit_move_insn (base_reg, val);
6237 x = plus_constant (base_reg, low_n);
6239 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6240 x = gen_rtx_PLUS (SImode, xop0, xop1);
6243 /* XXX We don't allow MINUS any more -- see comment in
6244 arm_legitimate_address_outer_p (). */
6245 else if (GET_CODE (x) == MINUS)
6247 rtx xop0 = XEXP (x, 0);
6248 rtx xop1 = XEXP (x, 1);
6250 if (CONSTANT_P (xop0))
6251 xop0 = force_reg (SImode, xop0);
6253 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6254 xop1 = force_reg (SImode, xop1);
6256 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6257 x = gen_rtx_MINUS (SImode, xop0, xop1);
6260 /* Make sure to take full advantage of the pre-indexed addressing mode
6261 with absolute addresses which often allows for the base register to
6262 be factorized for multiple adjacent memory references, and it might
6263 even allows for the mini pool to be avoided entirely. */
6264 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6267 HOST_WIDE_INT mask, base, index;
6270 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6271 use a 8-bit index. So let's use a 12-bit index for SImode only and
6272 hope that arm_gen_constant will enable ldrb to use more bits. */
6273 bits = (mode == SImode) ? 12 : 8;
6274 mask = (1 << bits) - 1;
6275 base = INTVAL (x) & ~mask;
6276 index = INTVAL (x) & mask;
6277 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6279 /* It'll most probably be more efficient to generate the base
6280 with more bits set and use a negative index instead. */
6284 base_reg = force_reg (SImode, GEN_INT (base));
6285 x = plus_constant (base_reg, index);
6290 /* We need to find and carefully transform any SYMBOL and LABEL
6291 references; so go back to the original address expression. */
6292 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6294 if (new_x != orig_x)
6302 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6303 to be legitimate. If we find one, return the new, valid address. */
6305 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6307 if (arm_tls_symbol_p (x))
6308 return legitimize_tls_address (x, NULL_RTX);
6310 if (GET_CODE (x) == PLUS
6311 && GET_CODE (XEXP (x, 1)) == CONST_INT
6312 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6313 || INTVAL (XEXP (x, 1)) < 0))
6315 rtx xop0 = XEXP (x, 0);
6316 rtx xop1 = XEXP (x, 1);
6317 HOST_WIDE_INT offset = INTVAL (xop1);
6319 /* Try and fold the offset into a biasing of the base register and
6320 then offsetting that. Don't do this when optimizing for space
6321 since it can cause too many CSEs. */
6322 if (optimize_size && offset >= 0
6323 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6325 HOST_WIDE_INT delta;
6328 delta = offset - (256 - GET_MODE_SIZE (mode));
6329 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6330 delta = 31 * GET_MODE_SIZE (mode);
6332 delta = offset & (~31 * GET_MODE_SIZE (mode));
6334 xop0 = force_operand (plus_constant (xop0, offset - delta),
6336 x = plus_constant (xop0, delta);
6338 else if (offset < 0 && offset > -256)
6339 /* Small negative offsets are best done with a subtract before the
6340 dereference, forcing these into a register normally takes two
6342 x = force_operand (x, NULL_RTX);
6345 /* For the remaining cases, force the constant into a register. */
6346 xop1 = force_reg (SImode, xop1);
6347 x = gen_rtx_PLUS (SImode, xop0, xop1);
6350 else if (GET_CODE (x) == PLUS
6351 && s_register_operand (XEXP (x, 1), SImode)
6352 && !s_register_operand (XEXP (x, 0), SImode))
6354 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6356 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6361 /* We need to find and carefully transform any SYMBOL and LABEL
6362 references; so go back to the original address expression. */
6363 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6365 if (new_x != orig_x)
6373 thumb_legitimize_reload_address (rtx *x_p,
6374 enum machine_mode mode,
6375 int opnum, int type,
6376 int ind_levels ATTRIBUTE_UNUSED)
6380 if (GET_CODE (x) == PLUS
6381 && GET_MODE_SIZE (mode) < 4
6382 && REG_P (XEXP (x, 0))
6383 && XEXP (x, 0) == stack_pointer_rtx
6384 && GET_CODE (XEXP (x, 1)) == CONST_INT
6385 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6390 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6391 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6395 /* If both registers are hi-regs, then it's better to reload the
6396 entire expression rather than each register individually. That
6397 only requires one reload register rather than two. */
6398 if (GET_CODE (x) == PLUS
6399 && REG_P (XEXP (x, 0))
6400 && REG_P (XEXP (x, 1))
6401 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6402 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6407 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6408 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6415 /* Test for various thread-local symbols. */
6417 /* Return TRUE if X is a thread-local symbol. */
6420 arm_tls_symbol_p (rtx x)
6422 if (! TARGET_HAVE_TLS)
6425 if (GET_CODE (x) != SYMBOL_REF)
6428 return SYMBOL_REF_TLS_MODEL (x) != 0;
6431 /* Helper for arm_tls_referenced_p. */
6434 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6436 if (GET_CODE (*x) == SYMBOL_REF)
6437 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6439 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6440 TLS offsets, not real symbol references. */
6441 if (GET_CODE (*x) == UNSPEC
6442 && XINT (*x, 1) == UNSPEC_TLS)
6448 /* Return TRUE if X contains any TLS symbol references. */
6451 arm_tls_referenced_p (rtx x)
6453 if (! TARGET_HAVE_TLS)
6456 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6459 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6462 arm_cannot_force_const_mem (rtx x)
6466 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6468 split_const (x, &base, &offset);
6469 if (GET_CODE (base) == SYMBOL_REF
6470 && !offset_within_block_p (base, INTVAL (offset)))
6473 return arm_tls_referenced_p (x);
6476 #define REG_OR_SUBREG_REG(X) \
6477 (GET_CODE (X) == REG \
6478 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6480 #define REG_OR_SUBREG_RTX(X) \
6481 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6484 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6486 enum machine_mode mode = GET_MODE (x);
6500 return COSTS_N_INSNS (1);
6503 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6506 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6513 return COSTS_N_INSNS (2) + cycles;
6515 return COSTS_N_INSNS (1) + 16;
6518 return (COSTS_N_INSNS (1)
6519 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6520 + GET_CODE (SET_DEST (x)) == MEM));
6525 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6527 if (thumb_shiftable_const (INTVAL (x)))
6528 return COSTS_N_INSNS (2);
6529 return COSTS_N_INSNS (3);
6531 else if ((outer == PLUS || outer == COMPARE)
6532 && INTVAL (x) < 256 && INTVAL (x) > -256)
6534 else if ((outer == IOR || outer == XOR || outer == AND)
6535 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6536 return COSTS_N_INSNS (1);
6537 else if (outer == AND)
6540 /* This duplicates the tests in the andsi3 expander. */
6541 for (i = 9; i <= 31; i++)
6542 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6543 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6544 return COSTS_N_INSNS (2);
6546 else if (outer == ASHIFT || outer == ASHIFTRT
6547 || outer == LSHIFTRT)
6549 return COSTS_N_INSNS (2);
6555 return COSTS_N_INSNS (3);
6573 /* XXX another guess. */
6574 /* Memory costs quite a lot for the first word, but subsequent words
6575 load at the equivalent of a single insn each. */
6576 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6577 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6582 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6588 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6589 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6595 return total + COSTS_N_INSNS (1);
6597 /* Assume a two-shift sequence. Increase the cost slightly so
6598 we prefer actual shifts over an extend operation. */
6599 return total + 1 + COSTS_N_INSNS (2);
6607 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6609 enum machine_mode mode = GET_MODE (x);
6610 enum rtx_code subcode;
6612 enum rtx_code code = GET_CODE (x);
6618 /* Memory costs quite a lot for the first word, but subsequent words
6619 load at the equivalent of a single insn each. */
6620 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6627 if (TARGET_HARD_FLOAT && mode == SFmode)
6628 *total = COSTS_N_INSNS (2);
6629 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6630 *total = COSTS_N_INSNS (4);
6632 *total = COSTS_N_INSNS (20);
6636 if (GET_CODE (XEXP (x, 1)) == REG)
6637 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6638 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6639 *total = rtx_cost (XEXP (x, 1), code, speed);
6645 *total += COSTS_N_INSNS (4);
6650 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6651 *total += rtx_cost (XEXP (x, 0), code, speed);
6654 *total += COSTS_N_INSNS (3);
6658 *total += COSTS_N_INSNS (1);
6659 /* Increase the cost of complex shifts because they aren't any faster,
6660 and reduce dual issue opportunities. */
6661 if (arm_tune_cortex_a9
6662 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6670 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6671 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6672 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6674 *total += rtx_cost (XEXP (x, 1), code, speed);
6678 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6679 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6681 *total += rtx_cost (XEXP (x, 0), code, speed);
6688 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6690 if (TARGET_HARD_FLOAT
6692 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6694 *total = COSTS_N_INSNS (1);
6695 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6696 && arm_const_double_rtx (XEXP (x, 0)))
6698 *total += rtx_cost (XEXP (x, 1), code, speed);
6702 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6703 && arm_const_double_rtx (XEXP (x, 1)))
6705 *total += rtx_cost (XEXP (x, 0), code, speed);
6711 *total = COSTS_N_INSNS (20);
6715 *total = COSTS_N_INSNS (1);
6716 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6717 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6719 *total += rtx_cost (XEXP (x, 1), code, speed);
6723 subcode = GET_CODE (XEXP (x, 1));
6724 if (subcode == ASHIFT || subcode == ASHIFTRT
6725 || subcode == LSHIFTRT
6726 || subcode == ROTATE || subcode == ROTATERT)
6728 *total += rtx_cost (XEXP (x, 0), code, speed);
6729 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6733 /* A shift as a part of RSB costs no more than RSB itself. */
6734 if (GET_CODE (XEXP (x, 0)) == MULT
6735 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6737 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6738 *total += rtx_cost (XEXP (x, 1), code, speed);
6743 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6745 *total += rtx_cost (XEXP (x, 0), code, speed);
6746 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6750 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6751 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6753 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6754 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6755 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6756 *total += COSTS_N_INSNS (1);
6764 if (code == PLUS && arm_arch6 && mode == SImode
6765 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6766 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6768 *total = COSTS_N_INSNS (1);
6769 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6771 *total += rtx_cost (XEXP (x, 1), code, speed);
6775 /* MLA: All arguments must be registers. We filter out
6776 multiplication by a power of two, so that we fall down into
6778 if (GET_CODE (XEXP (x, 0)) == MULT
6779 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6781 /* The cost comes from the cost of the multiply. */
6785 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6787 if (TARGET_HARD_FLOAT
6789 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6791 *total = COSTS_N_INSNS (1);
6792 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6793 && arm_const_double_rtx (XEXP (x, 1)))
6795 *total += rtx_cost (XEXP (x, 0), code, speed);
6802 *total = COSTS_N_INSNS (20);
6806 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6807 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6809 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6810 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6811 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6812 *total += COSTS_N_INSNS (1);
6818 case AND: case XOR: case IOR:
6820 /* Normally the frame registers will be spilt into reg+const during
6821 reload, so it is a bad idea to combine them with other instructions,
6822 since then they might not be moved outside of loops. As a compromise
6823 we allow integration with ops that have a constant as their second
6825 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6826 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6827 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6828 *total = COSTS_N_INSNS (1);
6832 *total += COSTS_N_INSNS (2);
6833 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6834 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6836 *total += rtx_cost (XEXP (x, 0), code, speed);
6843 *total += COSTS_N_INSNS (1);
6844 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6845 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6847 *total += rtx_cost (XEXP (x, 0), code, speed);
6850 subcode = GET_CODE (XEXP (x, 0));
6851 if (subcode == ASHIFT || subcode == ASHIFTRT
6852 || subcode == LSHIFTRT
6853 || subcode == ROTATE || subcode == ROTATERT)
6855 *total += rtx_cost (XEXP (x, 1), code, speed);
6856 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6861 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6863 *total += rtx_cost (XEXP (x, 1), code, speed);
6864 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6868 if (subcode == UMIN || subcode == UMAX
6869 || subcode == SMIN || subcode == SMAX)
6871 *total = COSTS_N_INSNS (3);
6878 /* This should have been handled by the CPU specific routines. */
6882 if (arm_arch3m && mode == SImode
6883 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6884 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6885 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6886 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6887 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6888 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6890 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6893 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6897 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6899 if (TARGET_HARD_FLOAT
6901 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6903 *total = COSTS_N_INSNS (1);
6906 *total = COSTS_N_INSNS (2);
6912 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6913 if (mode == SImode && code == NOT)
6915 subcode = GET_CODE (XEXP (x, 0));
6916 if (subcode == ASHIFT || subcode == ASHIFTRT
6917 || subcode == LSHIFTRT
6918 || subcode == ROTATE || subcode == ROTATERT
6920 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6922 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6923 /* Register shifts cost an extra cycle. */
6924 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6925 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6934 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6936 *total = COSTS_N_INSNS (4);
6940 operand = XEXP (x, 0);
6942 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6943 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6944 && GET_CODE (XEXP (operand, 0)) == REG
6945 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6946 *total += COSTS_N_INSNS (1);
6947 *total += (rtx_cost (XEXP (x, 1), code, speed)
6948 + rtx_cost (XEXP (x, 2), code, speed));
6952 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6954 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6960 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6961 && mode == SImode && XEXP (x, 1) == const0_rtx)
6963 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6969 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6970 && mode == SImode && XEXP (x, 1) == const0_rtx)
6972 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6992 /* SCC insns. In the case where the comparison has already been
6993 performed, then they cost 2 instructions. Otherwise they need
6994 an additional comparison before them. */
6995 *total = COSTS_N_INSNS (2);
6996 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7003 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7009 *total += COSTS_N_INSNS (1);
7010 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7011 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7013 *total += rtx_cost (XEXP (x, 0), code, speed);
7017 subcode = GET_CODE (XEXP (x, 0));
7018 if (subcode == ASHIFT || subcode == ASHIFTRT
7019 || subcode == LSHIFTRT
7020 || subcode == ROTATE || subcode == ROTATERT)
7022 *total += rtx_cost (XEXP (x, 1), code, speed);
7023 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7028 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7030 *total += rtx_cost (XEXP (x, 1), code, speed);
7031 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7041 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7042 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7043 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7044 *total += rtx_cost (XEXP (x, 1), code, speed);
7048 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7050 if (TARGET_HARD_FLOAT
7052 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7054 *total = COSTS_N_INSNS (1);
7057 *total = COSTS_N_INSNS (20);
7060 *total = COSTS_N_INSNS (1);
7062 *total += COSTS_N_INSNS (3);
7068 if (GET_MODE_CLASS (mode) == MODE_INT)
7070 rtx op = XEXP (x, 0);
7071 enum machine_mode opmode = GET_MODE (op);
7074 *total += COSTS_N_INSNS (1);
7076 if (opmode != SImode)
7080 /* If !arm_arch4, we use one of the extendhisi2_mem
7081 or movhi_bytes patterns for HImode. For a QImode
7082 sign extension, we first zero-extend from memory
7083 and then perform a shift sequence. */
7084 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7085 *total += COSTS_N_INSNS (2);
7088 *total += COSTS_N_INSNS (1);
7090 /* We don't have the necessary insn, so we need to perform some
7092 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7093 /* An and with constant 255. */
7094 *total += COSTS_N_INSNS (1);
7096 /* A shift sequence. Increase costs slightly to avoid
7097 combining two shifts into an extend operation. */
7098 *total += COSTS_N_INSNS (2) + 1;
7104 switch (GET_MODE (XEXP (x, 0)))
7111 *total = COSTS_N_INSNS (1);
7121 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7125 if (const_ok_for_arm (INTVAL (x))
7126 || const_ok_for_arm (~INTVAL (x)))
7127 *total = COSTS_N_INSNS (1);
7129 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7130 INTVAL (x), NULL_RTX,
7137 *total = COSTS_N_INSNS (3);
7141 *total = COSTS_N_INSNS (1);
7145 *total = COSTS_N_INSNS (1);
7146 *total += rtx_cost (XEXP (x, 0), code, speed);
7150 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7151 && (mode == SFmode || !TARGET_VFP_SINGLE))
7152 *total = COSTS_N_INSNS (1);
7154 *total = COSTS_N_INSNS (4);
7158 *total = COSTS_N_INSNS (4);
7163 /* Estimates the size cost of thumb1 instructions.
7164 For now most of the code is copied from thumb1_rtx_costs. We need more
7165 fine grain tuning when we have more related test cases. */
7167 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7169 enum machine_mode mode = GET_MODE (x);
7182 return COSTS_N_INSNS (1);
7185 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7187 /* Thumb1 mul instruction can't operate on const. We must Load it
7188 into a register first. */
7189 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7190 return COSTS_N_INSNS (1) + const_size;
7192 return COSTS_N_INSNS (1);
7195 return (COSTS_N_INSNS (1)
7196 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7197 + GET_CODE (SET_DEST (x)) == MEM));
7202 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7203 return COSTS_N_INSNS (1);
7204 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7205 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7206 return COSTS_N_INSNS (2);
7207 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7208 if (thumb_shiftable_const (INTVAL (x)))
7209 return COSTS_N_INSNS (2);
7210 return COSTS_N_INSNS (3);
7212 else if ((outer == PLUS || outer == COMPARE)
7213 && INTVAL (x) < 256 && INTVAL (x) > -256)
7215 else if ((outer == IOR || outer == XOR || outer == AND)
7216 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7217 return COSTS_N_INSNS (1);
7218 else if (outer == AND)
7221 /* This duplicates the tests in the andsi3 expander. */
7222 for (i = 9; i <= 31; i++)
7223 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7224 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7225 return COSTS_N_INSNS (2);
7227 else if (outer == ASHIFT || outer == ASHIFTRT
7228 || outer == LSHIFTRT)
7230 return COSTS_N_INSNS (2);
7236 return COSTS_N_INSNS (3);
7254 /* XXX another guess. */
7255 /* Memory costs quite a lot for the first word, but subsequent words
7256 load at the equivalent of a single insn each. */
7257 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7258 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7263 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7268 /* XXX still guessing. */
7269 switch (GET_MODE (XEXP (x, 0)))
7272 return (1 + (mode == DImode ? 4 : 0)
7273 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7276 return (4 + (mode == DImode ? 4 : 0)
7277 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7280 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7291 /* RTX costs when optimizing for size. */
7293 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7296 enum machine_mode mode = GET_MODE (x);
7299 *total = thumb1_size_rtx_costs (x, code, outer_code);
7303 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7307 /* A memory access costs 1 insn if the mode is small, or the address is
7308 a single register, otherwise it costs one insn per word. */
7309 if (REG_P (XEXP (x, 0)))
7310 *total = COSTS_N_INSNS (1);
7312 && GET_CODE (XEXP (x, 0)) == PLUS
7313 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7314 /* This will be split into two instructions.
7315 See arm.md:calculate_pic_address. */
7316 *total = COSTS_N_INSNS (2);
7318 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7325 /* Needs a libcall, so it costs about this. */
7326 *total = COSTS_N_INSNS (2);
7330 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7332 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7340 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7342 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7345 else if (mode == SImode)
7347 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7348 /* Slightly disparage register shifts, but not by much. */
7349 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7350 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7354 /* Needs a libcall. */
7355 *total = COSTS_N_INSNS (2);
7359 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7360 && (mode == SFmode || !TARGET_VFP_SINGLE))
7362 *total = COSTS_N_INSNS (1);
7368 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7369 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7371 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7372 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7373 || subcode1 == ROTATE || subcode1 == ROTATERT
7374 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7375 || subcode1 == ASHIFTRT)
7377 /* It's just the cost of the two operands. */
7382 *total = COSTS_N_INSNS (1);
7386 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7390 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7391 && (mode == SFmode || !TARGET_VFP_SINGLE))
7393 *total = COSTS_N_INSNS (1);
7397 /* A shift as a part of ADD costs nothing. */
7398 if (GET_CODE (XEXP (x, 0)) == MULT
7399 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7401 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7402 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7403 *total += rtx_cost (XEXP (x, 1), code, false);
7408 case AND: case XOR: case IOR:
7411 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7413 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7414 || subcode == LSHIFTRT || subcode == ASHIFTRT
7415 || (code == AND && subcode == NOT))
7417 /* It's just the cost of the two operands. */
7423 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7427 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7431 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7432 && (mode == SFmode || !TARGET_VFP_SINGLE))
7434 *total = COSTS_N_INSNS (1);
7440 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7449 if (cc_register (XEXP (x, 0), VOIDmode))
7452 *total = COSTS_N_INSNS (1);
7456 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7457 && (mode == SFmode || !TARGET_VFP_SINGLE))
7458 *total = COSTS_N_INSNS (1);
7460 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7465 return arm_rtx_costs_1 (x, outer_code, total, 0);
7468 if (const_ok_for_arm (INTVAL (x)))
7469 /* A multiplication by a constant requires another instruction
7470 to load the constant to a register. */
7471 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7473 else if (const_ok_for_arm (~INTVAL (x)))
7474 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7475 else if (const_ok_for_arm (-INTVAL (x)))
7477 if (outer_code == COMPARE || outer_code == PLUS
7478 || outer_code == MINUS)
7481 *total = COSTS_N_INSNS (1);
7484 *total = COSTS_N_INSNS (2);
7490 *total = COSTS_N_INSNS (2);
7494 *total = COSTS_N_INSNS (4);
7499 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7500 cost of these slightly. */
7501 *total = COSTS_N_INSNS (1) + 1;
7505 if (mode != VOIDmode)
7506 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7508 *total = COSTS_N_INSNS (4); /* How knows? */
7513 /* RTX costs when optimizing for size. */
7515 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7519 return arm_size_rtx_costs (x, (enum rtx_code) code,
7520 (enum rtx_code) outer_code, total);
7522 return current_tune->rtx_costs (x, (enum rtx_code) code,
7523 (enum rtx_code) outer_code,
7527 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7528 supported on any "slowmul" cores, so it can be ignored. */
7531 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7532 int *total, bool speed)
7534 enum machine_mode mode = GET_MODE (x);
7538 *total = thumb1_rtx_costs (x, code, outer_code);
7545 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7548 *total = COSTS_N_INSNS (20);
7552 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7554 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7555 & (unsigned HOST_WIDE_INT) 0xffffffff);
7556 int cost, const_ok = const_ok_for_arm (i);
7557 int j, booth_unit_size;
7559 /* Tune as appropriate. */
7560 cost = const_ok ? 4 : 8;
7561 booth_unit_size = 2;
7562 for (j = 0; i && j < 32; j += booth_unit_size)
7564 i >>= booth_unit_size;
7568 *total = COSTS_N_INSNS (cost);
7569 *total += rtx_cost (XEXP (x, 0), code, speed);
7573 *total = COSTS_N_INSNS (20);
7577 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7582 /* RTX cost for cores with a fast multiply unit (M variants). */
7585 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7586 int *total, bool speed)
7588 enum machine_mode mode = GET_MODE (x);
7592 *total = thumb1_rtx_costs (x, code, outer_code);
7596 /* ??? should thumb2 use different costs? */
7600 /* There is no point basing this on the tuning, since it is always the
7601 fast variant if it exists at all. */
7603 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7604 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7605 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7607 *total = COSTS_N_INSNS(2);
7614 *total = COSTS_N_INSNS (5);
7618 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7620 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7621 & (unsigned HOST_WIDE_INT) 0xffffffff);
7622 int cost, const_ok = const_ok_for_arm (i);
7623 int j, booth_unit_size;
7625 /* Tune as appropriate. */
7626 cost = const_ok ? 4 : 8;
7627 booth_unit_size = 8;
7628 for (j = 0; i && j < 32; j += booth_unit_size)
7630 i >>= booth_unit_size;
7634 *total = COSTS_N_INSNS(cost);
7640 *total = COSTS_N_INSNS (4);
7644 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7646 if (TARGET_HARD_FLOAT
7648 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7650 *total = COSTS_N_INSNS (1);
7655 /* Requires a lib call */
7656 *total = COSTS_N_INSNS (20);
7660 return arm_rtx_costs_1 (x, outer_code, total, speed);
7665 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7666 so it can be ignored. */
7669 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7670 int *total, bool speed)
7672 enum machine_mode mode = GET_MODE (x);
7676 *total = thumb1_rtx_costs (x, code, outer_code);
7683 if (GET_CODE (XEXP (x, 0)) != MULT)
7684 return arm_rtx_costs_1 (x, outer_code, total, speed);
7686 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7687 will stall until the multiplication is complete. */
7688 *total = COSTS_N_INSNS (3);
7692 /* There is no point basing this on the tuning, since it is always the
7693 fast variant if it exists at all. */
7695 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7696 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7697 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7699 *total = COSTS_N_INSNS (2);
7706 *total = COSTS_N_INSNS (5);
7710 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7712 /* If operand 1 is a constant we can more accurately
7713 calculate the cost of the multiply. The multiplier can
7714 retire 15 bits on the first cycle and a further 12 on the
7715 second. We do, of course, have to load the constant into
7716 a register first. */
7717 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7718 /* There's a general overhead of one cycle. */
7720 unsigned HOST_WIDE_INT masked_const;
7725 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7727 masked_const = i & 0xffff8000;
7728 if (masked_const != 0)
7731 masked_const = i & 0xf8000000;
7732 if (masked_const != 0)
7735 *total = COSTS_N_INSNS (cost);
7741 *total = COSTS_N_INSNS (3);
7745 /* Requires a lib call */
7746 *total = COSTS_N_INSNS (20);
7750 return arm_rtx_costs_1 (x, outer_code, total, speed);
7755 /* RTX costs for 9e (and later) cores. */
7758 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7759 int *total, bool speed)
7761 enum machine_mode mode = GET_MODE (x);
7768 *total = COSTS_N_INSNS (3);
7772 *total = thumb1_rtx_costs (x, code, outer_code);
7780 /* There is no point basing this on the tuning, since it is always the
7781 fast variant if it exists at all. */
7783 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7784 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7785 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7787 *total = COSTS_N_INSNS (2);
7794 *total = COSTS_N_INSNS (5);
7800 *total = COSTS_N_INSNS (2);
7804 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7806 if (TARGET_HARD_FLOAT
7808 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7810 *total = COSTS_N_INSNS (1);
7815 *total = COSTS_N_INSNS (20);
7819 return arm_rtx_costs_1 (x, outer_code, total, speed);
7822 /* All address computations that can be done are free, but rtx cost returns
7823 the same for practically all of them. So we weight the different types
7824 of address here in the order (most pref first):
7825 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7827 arm_arm_address_cost (rtx x)
7829 enum rtx_code c = GET_CODE (x);
7831 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7833 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7838 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7841 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7851 arm_thumb_address_cost (rtx x)
7853 enum rtx_code c = GET_CODE (x);
7858 && GET_CODE (XEXP (x, 0)) == REG
7859 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7866 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7868 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7871 /* Adjust cost hook for XScale. */
7873 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7875 /* Some true dependencies can have a higher cost depending
7876 on precisely how certain input operands are used. */
7877 if (REG_NOTE_KIND(link) == 0
7878 && recog_memoized (insn) >= 0
7879 && recog_memoized (dep) >= 0)
7881 int shift_opnum = get_attr_shift (insn);
7882 enum attr_type attr_type = get_attr_type (dep);
7884 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7885 operand for INSN. If we have a shifted input operand and the
7886 instruction we depend on is another ALU instruction, then we may
7887 have to account for an additional stall. */
7888 if (shift_opnum != 0
7889 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7891 rtx shifted_operand;
7894 /* Get the shifted operand. */
7895 extract_insn (insn);
7896 shifted_operand = recog_data.operand[shift_opnum];
7898 /* Iterate over all the operands in DEP. If we write an operand
7899 that overlaps with SHIFTED_OPERAND, then we have increase the
7900 cost of this dependency. */
7902 preprocess_constraints ();
7903 for (opno = 0; opno < recog_data.n_operands; opno++)
7905 /* We can ignore strict inputs. */
7906 if (recog_data.operand_type[opno] == OP_IN)
7909 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7921 /* Adjust cost hook for Cortex A9. */
7923 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7925 switch (REG_NOTE_KIND (link))
7932 case REG_DEP_OUTPUT:
7933 if (recog_memoized (insn) >= 0
7934 && recog_memoized (dep) >= 0)
7936 if (GET_CODE (PATTERN (insn)) == SET)
7939 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7941 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7943 enum attr_type attr_type_insn = get_attr_type (insn);
7944 enum attr_type attr_type_dep = get_attr_type (dep);
7946 /* By default all dependencies of the form
7949 have an extra latency of 1 cycle because
7950 of the input and output dependency in this
7951 case. However this gets modeled as an true
7952 dependency and hence all these checks. */
7953 if (REG_P (SET_DEST (PATTERN (insn)))
7954 && REG_P (SET_DEST (PATTERN (dep)))
7955 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7956 SET_DEST (PATTERN (dep))))
7958 /* FMACS is a special case where the dependant
7959 instruction can be issued 3 cycles before
7960 the normal latency in case of an output
7962 if ((attr_type_insn == TYPE_FMACS
7963 || attr_type_insn == TYPE_FMACD)
7964 && (attr_type_dep == TYPE_FMACS
7965 || attr_type_dep == TYPE_FMACD))
7967 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7968 *cost = insn_default_latency (dep) - 3;
7970 *cost = insn_default_latency (dep);
7975 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7976 *cost = insn_default_latency (dep) + 1;
7978 *cost = insn_default_latency (dep);
7994 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7995 It corrects the value of COST based on the relationship between
7996 INSN and DEP through the dependence LINK. It returns the new
7997 value. There is a per-core adjust_cost hook to adjust scheduler costs
7998 and the per-core hook can choose to completely override the generic
7999 adjust_cost function. Only put bits of code into arm_adjust_cost that
8000 are common across all cores. */
8002 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8006 /* When generating Thumb-1 code, we want to place flag-setting operations
8007 close to a conditional branch which depends on them, so that we can
8008 omit the comparison. */
8010 && REG_NOTE_KIND (link) == 0
8011 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8012 && recog_memoized (dep) >= 0
8013 && get_attr_conds (dep) == CONDS_SET)
8016 if (current_tune->sched_adjust_cost != NULL)
8018 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8022 /* XXX This is not strictly true for the FPA. */
8023 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8024 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8027 /* Call insns don't incur a stall, even if they follow a load. */
8028 if (REG_NOTE_KIND (link) == 0
8029 && GET_CODE (insn) == CALL_INSN)
8032 if ((i_pat = single_set (insn)) != NULL
8033 && GET_CODE (SET_SRC (i_pat)) == MEM
8034 && (d_pat = single_set (dep)) != NULL
8035 && GET_CODE (SET_DEST (d_pat)) == MEM)
8037 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8038 /* This is a load after a store, there is no conflict if the load reads
8039 from a cached area. Assume that loads from the stack, and from the
8040 constant pool are cached, and that others will miss. This is a
8043 if ((GET_CODE (src_mem) == SYMBOL_REF
8044 && CONSTANT_POOL_ADDRESS_P (src_mem))
8045 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8046 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8047 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8054 static int fp_consts_inited = 0;
8056 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8057 static const char * const strings_fp[8] =
8060 "4", "5", "0.5", "10"
8063 static REAL_VALUE_TYPE values_fp[8];
8066 init_fp_table (void)
8072 fp_consts_inited = 1;
8074 fp_consts_inited = 8;
8076 for (i = 0; i < fp_consts_inited; i++)
8078 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8083 /* Return TRUE if rtx X is a valid immediate FP constant. */
8085 arm_const_double_rtx (rtx x)
8090 if (!fp_consts_inited)
8093 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8094 if (REAL_VALUE_MINUS_ZERO (r))
8097 for (i = 0; i < fp_consts_inited; i++)
8098 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8104 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8106 neg_const_double_rtx_ok_for_fpa (rtx x)
8111 if (!fp_consts_inited)
8114 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8115 r = real_value_negate (&r);
8116 if (REAL_VALUE_MINUS_ZERO (r))
8119 for (i = 0; i < 8; i++)
8120 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8127 /* VFPv3 has a fairly wide range of representable immediates, formed from
8128 "quarter-precision" floating-point values. These can be evaluated using this
8129 formula (with ^ for exponentiation):
8133 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8134 16 <= n <= 31 and 0 <= r <= 7.
8136 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8138 - A (most-significant) is the sign bit.
8139 - BCD are the exponent (encoded as r XOR 3).
8140 - EFGH are the mantissa (encoded as n - 16).
8143 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8144 fconst[sd] instruction, or -1 if X isn't suitable. */
8146 vfp3_const_double_index (rtx x)
8148 REAL_VALUE_TYPE r, m;
8150 unsigned HOST_WIDE_INT mantissa, mant_hi;
8151 unsigned HOST_WIDE_INT mask;
8152 HOST_WIDE_INT m1, m2;
8153 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8155 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8158 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8160 /* We can't represent these things, so detect them first. */
8161 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8164 /* Extract sign, exponent and mantissa. */
8165 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8166 r = real_value_abs (&r);
8167 exponent = REAL_EXP (&r);
8168 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8169 highest (sign) bit, with a fixed binary point at bit point_pos.
8170 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8171 bits for the mantissa, this may fail (low bits would be lost). */
8172 real_ldexp (&m, &r, point_pos - exponent);
8173 REAL_VALUE_TO_INT (&m1, &m2, m);
8177 /* If there are bits set in the low part of the mantissa, we can't
8178 represent this value. */
8182 /* Now make it so that mantissa contains the most-significant bits, and move
8183 the point_pos to indicate that the least-significant bits have been
8185 point_pos -= HOST_BITS_PER_WIDE_INT;
8188 /* We can permit four significant bits of mantissa only, plus a high bit
8189 which is always 1. */
8190 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8191 if ((mantissa & mask) != 0)
8194 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8195 mantissa >>= point_pos - 5;
8197 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8198 floating-point immediate zero with Neon using an integer-zero load, but
8199 that case is handled elsewhere.) */
8203 gcc_assert (mantissa >= 16 && mantissa <= 31);
8205 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8206 normalized significands are in the range [1, 2). (Our mantissa is shifted
8207 left 4 places at this point relative to normalized IEEE754 values). GCC
8208 internally uses [0.5, 1) (see real.c), so the exponent returned from
8209 REAL_EXP must be altered. */
8210 exponent = 5 - exponent;
8212 if (exponent < 0 || exponent > 7)
8215 /* Sign, mantissa and exponent are now in the correct form to plug into the
8216 formula described in the comment above. */
8217 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8220 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8222 vfp3_const_double_rtx (rtx x)
8227 return vfp3_const_double_index (x) != -1;
8230 /* Recognize immediates which can be used in various Neon instructions. Legal
8231 immediates are described by the following table (for VMVN variants, the
8232 bitwise inverse of the constant shown is recognized. In either case, VMOV
8233 is output and the correct instruction to use for a given constant is chosen
8234 by the assembler). The constant shown is replicated across all elements of
8235 the destination vector.
8237 insn elems variant constant (binary)
8238 ---- ----- ------- -----------------
8239 vmov i32 0 00000000 00000000 00000000 abcdefgh
8240 vmov i32 1 00000000 00000000 abcdefgh 00000000
8241 vmov i32 2 00000000 abcdefgh 00000000 00000000
8242 vmov i32 3 abcdefgh 00000000 00000000 00000000
8243 vmov i16 4 00000000 abcdefgh
8244 vmov i16 5 abcdefgh 00000000
8245 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8246 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8247 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8248 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8249 vmvn i16 10 00000000 abcdefgh
8250 vmvn i16 11 abcdefgh 00000000
8251 vmov i32 12 00000000 00000000 abcdefgh 11111111
8252 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8253 vmov i32 14 00000000 abcdefgh 11111111 11111111
8254 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8256 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8257 eeeeeeee ffffffff gggggggg hhhhhhhh
8258 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8260 For case 18, B = !b. Representable values are exactly those accepted by
8261 vfp3_const_double_index, but are output as floating-point numbers rather
8264 Variants 0-5 (inclusive) may also be used as immediates for the second
8265 operand of VORR/VBIC instructions.
8267 The INVERSE argument causes the bitwise inverse of the given operand to be
8268 recognized instead (used for recognizing legal immediates for the VAND/VORN
8269 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8270 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8271 output, rather than the real insns vbic/vorr).
8273 INVERSE makes no difference to the recognition of float vectors.
8275 The return value is the variant of immediate as shown in the above table, or
8276 -1 if the given value doesn't match any of the listed patterns.
8279 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8280 rtx *modconst, int *elementwidth)
8282 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8284 for (i = 0; i < idx; i += (STRIDE)) \
8289 immtype = (CLASS); \
8290 elsize = (ELSIZE); \
8294 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8295 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8296 unsigned char bytes[16];
8297 int immtype = -1, matches;
8298 unsigned int invmask = inverse ? 0xff : 0;
8300 /* Vectors of float constants. */
8301 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8303 rtx el0 = CONST_VECTOR_ELT (op, 0);
8306 if (!vfp3_const_double_rtx (el0))
8309 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8311 for (i = 1; i < n_elts; i++)
8313 rtx elt = CONST_VECTOR_ELT (op, i);
8316 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8318 if (!REAL_VALUES_EQUAL (r0, re))
8323 *modconst = CONST_VECTOR_ELT (op, 0);
8331 /* Splat vector constant out into a byte vector. */
8332 for (i = 0; i < n_elts; i++)
8334 rtx el = CONST_VECTOR_ELT (op, i);
8335 unsigned HOST_WIDE_INT elpart;
8336 unsigned int part, parts;
8338 if (GET_CODE (el) == CONST_INT)
8340 elpart = INTVAL (el);
8343 else if (GET_CODE (el) == CONST_DOUBLE)
8345 elpart = CONST_DOUBLE_LOW (el);
8351 for (part = 0; part < parts; part++)
8354 for (byte = 0; byte < innersize; byte++)
8356 bytes[idx++] = (elpart & 0xff) ^ invmask;
8357 elpart >>= BITS_PER_UNIT;
8359 if (GET_CODE (el) == CONST_DOUBLE)
8360 elpart = CONST_DOUBLE_HIGH (el);
8365 gcc_assert (idx == GET_MODE_SIZE (mode));
8369 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8370 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8372 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8373 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8375 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8376 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8378 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8379 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8381 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8383 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8385 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8386 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8388 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8389 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8391 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8392 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8394 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8395 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8397 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8399 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8401 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8402 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8404 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8405 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8407 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8408 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8410 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8411 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8413 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8415 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8416 && bytes[i] == bytes[(i + 8) % idx]);
8424 *elementwidth = elsize;
8428 unsigned HOST_WIDE_INT imm = 0;
8430 /* Un-invert bytes of recognized vector, if necessary. */
8432 for (i = 0; i < idx; i++)
8433 bytes[i] ^= invmask;
8437 /* FIXME: Broken on 32-bit H_W_I hosts. */
8438 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8440 for (i = 0; i < 8; i++)
8441 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8442 << (i * BITS_PER_UNIT);
8444 *modconst = GEN_INT (imm);
8448 unsigned HOST_WIDE_INT imm = 0;
8450 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8451 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8453 *modconst = GEN_INT (imm);
8461 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8462 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8463 float elements), and a modified constant (whatever should be output for a
8464 VMOV) in *MODCONST. */
8467 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8468 rtx *modconst, int *elementwidth)
8472 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8478 *modconst = tmpconst;
8481 *elementwidth = tmpwidth;
8486 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8487 the immediate is valid, write a constant suitable for using as an operand
8488 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8489 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8492 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8493 rtx *modconst, int *elementwidth)
8497 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8499 if (retval < 0 || retval > 5)
8503 *modconst = tmpconst;
8506 *elementwidth = tmpwidth;
8511 /* Return a string suitable for output of Neon immediate logic operation
8515 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8516 int inverse, int quad)
8518 int width, is_valid;
8519 static char templ[40];
8521 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8523 gcc_assert (is_valid != 0);
8526 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8528 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8533 /* Output a sequence of pairwise operations to implement a reduction.
8534 NOTE: We do "too much work" here, because pairwise operations work on two
8535 registers-worth of operands in one go. Unfortunately we can't exploit those
8536 extra calculations to do the full operation in fewer steps, I don't think.
8537 Although all vector elements of the result but the first are ignored, we
8538 actually calculate the same result in each of the elements. An alternative
8539 such as initially loading a vector with zero to use as each of the second
8540 operands would use up an additional register and take an extra instruction,
8541 for no particular gain. */
8544 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8545 rtx (*reduc) (rtx, rtx, rtx))
8547 enum machine_mode inner = GET_MODE_INNER (mode);
8548 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8551 for (i = parts / 2; i >= 1; i /= 2)
8553 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8554 emit_insn (reduc (dest, tmpsum, tmpsum));
8559 /* If VALS is a vector constant that can be loaded into a register
8560 using VDUP, generate instructions to do so and return an RTX to
8561 assign to the register. Otherwise return NULL_RTX. */
8564 neon_vdup_constant (rtx vals)
8566 enum machine_mode mode = GET_MODE (vals);
8567 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8568 int n_elts = GET_MODE_NUNITS (mode);
8569 bool all_same = true;
8573 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8576 for (i = 0; i < n_elts; ++i)
8578 x = XVECEXP (vals, 0, i);
8579 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8584 /* The elements are not all the same. We could handle repeating
8585 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8586 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8590 /* We can load this constant by using VDUP and a constant in a
8591 single ARM register. This will be cheaper than a vector
8594 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8595 return gen_rtx_VEC_DUPLICATE (mode, x);
8598 /* Generate code to load VALS, which is a PARALLEL containing only
8599 constants (for vec_init) or CONST_VECTOR, efficiently into a
8600 register. Returns an RTX to copy into the register, or NULL_RTX
8601 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8604 neon_make_constant (rtx vals)
8606 enum machine_mode mode = GET_MODE (vals);
8608 rtx const_vec = NULL_RTX;
8609 int n_elts = GET_MODE_NUNITS (mode);
8613 if (GET_CODE (vals) == CONST_VECTOR)
8615 else if (GET_CODE (vals) == PARALLEL)
8617 /* A CONST_VECTOR must contain only CONST_INTs and
8618 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8619 Only store valid constants in a CONST_VECTOR. */
8620 for (i = 0; i < n_elts; ++i)
8622 rtx x = XVECEXP (vals, 0, i);
8623 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8626 if (n_const == n_elts)
8627 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8632 if (const_vec != NULL
8633 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8634 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8636 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8637 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8638 pipeline cycle; creating the constant takes one or two ARM
8641 else if (const_vec != NULL_RTX)
8642 /* Load from constant pool. On Cortex-A8 this takes two cycles
8643 (for either double or quad vectors). We can not take advantage
8644 of single-cycle VLD1 because we need a PC-relative addressing
8648 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8649 We can not construct an initializer. */
8653 /* Initialize vector TARGET to VALS. */
8656 neon_expand_vector_init (rtx target, rtx vals)
8658 enum machine_mode mode = GET_MODE (target);
8659 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8660 int n_elts = GET_MODE_NUNITS (mode);
8661 int n_var = 0, one_var = -1;
8662 bool all_same = true;
8666 for (i = 0; i < n_elts; ++i)
8668 x = XVECEXP (vals, 0, i);
8669 if (!CONSTANT_P (x))
8670 ++n_var, one_var = i;
8672 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8678 rtx constant = neon_make_constant (vals);
8679 if (constant != NULL_RTX)
8681 emit_move_insn (target, constant);
8686 /* Splat a single non-constant element if we can. */
8687 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8689 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8690 emit_insn (gen_rtx_SET (VOIDmode, target,
8691 gen_rtx_VEC_DUPLICATE (mode, x)));
8695 /* One field is non-constant. Load constant then overwrite varying
8696 field. This is more efficient than using the stack. */
8699 rtx copy = copy_rtx (vals);
8700 rtx index = GEN_INT (one_var);
8702 /* Load constant part of vector, substitute neighboring value for
8704 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8705 neon_expand_vector_init (target, copy);
8707 /* Insert variable. */
8708 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8712 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8715 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8718 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8721 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8724 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8727 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8730 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8733 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8736 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8744 /* Construct the vector in memory one field at a time
8745 and load the whole vector. */
8746 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8747 for (i = 0; i < n_elts; i++)
8748 emit_move_insn (adjust_address_nv (mem, inner_mode,
8749 i * GET_MODE_SIZE (inner_mode)),
8750 XVECEXP (vals, 0, i));
8751 emit_move_insn (target, mem);
8754 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8755 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8756 reported source locations are bogus. */
8759 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8764 gcc_assert (GET_CODE (operand) == CONST_INT);
8766 lane = INTVAL (operand);
8768 if (lane < low || lane >= high)
8772 /* Bounds-check lanes. */
8775 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8777 bounds_check (operand, low, high, "lane out of range");
8780 /* Bounds-check constants. */
8783 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8785 bounds_check (operand, low, high, "constant out of range");
8789 neon_element_bits (enum machine_mode mode)
8792 return GET_MODE_BITSIZE (mode);
8794 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8798 /* Predicates for `match_operand' and `match_operator'. */
8800 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8802 cirrus_memory_offset (rtx op)
8804 /* Reject eliminable registers. */
8805 if (! (reload_in_progress || reload_completed)
8806 && ( reg_mentioned_p (frame_pointer_rtx, op)
8807 || reg_mentioned_p (arg_pointer_rtx, op)
8808 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8809 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8810 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8811 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8814 if (GET_CODE (op) == MEM)
8820 /* Match: (mem (reg)). */
8821 if (GET_CODE (ind) == REG)
8827 if (GET_CODE (ind) == PLUS
8828 && GET_CODE (XEXP (ind, 0)) == REG
8829 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8830 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8837 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8838 WB is true if full writeback address modes are allowed and is false
8839 if limited writeback address modes (POST_INC and PRE_DEC) are
8843 arm_coproc_mem_operand (rtx op, bool wb)
8847 /* Reject eliminable registers. */
8848 if (! (reload_in_progress || reload_completed)
8849 && ( reg_mentioned_p (frame_pointer_rtx, op)
8850 || reg_mentioned_p (arg_pointer_rtx, op)
8851 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8852 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8853 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8854 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8857 /* Constants are converted into offsets from labels. */
8858 if (GET_CODE (op) != MEM)
8863 if (reload_completed
8864 && (GET_CODE (ind) == LABEL_REF
8865 || (GET_CODE (ind) == CONST
8866 && GET_CODE (XEXP (ind, 0)) == PLUS
8867 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8868 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8871 /* Match: (mem (reg)). */
8872 if (GET_CODE (ind) == REG)
8873 return arm_address_register_rtx_p (ind, 0);
8875 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8876 acceptable in any case (subject to verification by
8877 arm_address_register_rtx_p). We need WB to be true to accept
8878 PRE_INC and POST_DEC. */
8879 if (GET_CODE (ind) == POST_INC
8880 || GET_CODE (ind) == PRE_DEC
8882 && (GET_CODE (ind) == PRE_INC
8883 || GET_CODE (ind) == POST_DEC)))
8884 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8887 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8888 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8889 && GET_CODE (XEXP (ind, 1)) == PLUS
8890 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8891 ind = XEXP (ind, 1);
8896 if (GET_CODE (ind) == PLUS
8897 && GET_CODE (XEXP (ind, 0)) == REG
8898 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8899 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8900 && INTVAL (XEXP (ind, 1)) > -1024
8901 && INTVAL (XEXP (ind, 1)) < 1024
8902 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8908 /* Return TRUE if OP is a memory operand which we can load or store a vector
8909 to/from. TYPE is one of the following values:
8910 0 - Vector load/stor (vldr)
8911 1 - Core registers (ldm)
8912 2 - Element/structure loads (vld1)
8915 neon_vector_mem_operand (rtx op, int type)
8919 /* Reject eliminable registers. */
8920 if (! (reload_in_progress || reload_completed)
8921 && ( reg_mentioned_p (frame_pointer_rtx, op)
8922 || reg_mentioned_p (arg_pointer_rtx, op)
8923 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8924 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8925 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8926 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8929 /* Constants are converted into offsets from labels. */
8930 if (GET_CODE (op) != MEM)
8935 if (reload_completed
8936 && (GET_CODE (ind) == LABEL_REF
8937 || (GET_CODE (ind) == CONST
8938 && GET_CODE (XEXP (ind, 0)) == PLUS
8939 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8940 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8943 /* Match: (mem (reg)). */
8944 if (GET_CODE (ind) == REG)
8945 return arm_address_register_rtx_p (ind, 0);
8947 /* Allow post-increment with Neon registers. */
8948 if ((type != 1 && GET_CODE (ind) == POST_INC)
8949 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8950 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8952 /* FIXME: vld1 allows register post-modify. */
8958 && GET_CODE (ind) == PLUS
8959 && GET_CODE (XEXP (ind, 0)) == REG
8960 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8961 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8962 && INTVAL (XEXP (ind, 1)) > -1024
8963 && INTVAL (XEXP (ind, 1)) < 1016
8964 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8970 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8973 neon_struct_mem_operand (rtx op)
8977 /* Reject eliminable registers. */
8978 if (! (reload_in_progress || reload_completed)
8979 && ( reg_mentioned_p (frame_pointer_rtx, op)
8980 || reg_mentioned_p (arg_pointer_rtx, op)
8981 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8982 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8983 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8984 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8987 /* Constants are converted into offsets from labels. */
8988 if (GET_CODE (op) != MEM)
8993 if (reload_completed
8994 && (GET_CODE (ind) == LABEL_REF
8995 || (GET_CODE (ind) == CONST
8996 && GET_CODE (XEXP (ind, 0)) == PLUS
8997 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8998 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9001 /* Match: (mem (reg)). */
9002 if (GET_CODE (ind) == REG)
9003 return arm_address_register_rtx_p (ind, 0);
9008 /* Return true if X is a register that will be eliminated later on. */
9010 arm_eliminable_register (rtx x)
9012 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9013 || REGNO (x) == ARG_POINTER_REGNUM
9014 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9015 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9018 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9019 coprocessor registers. Otherwise return NO_REGS. */
9022 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9026 if (!TARGET_NEON_FP16)
9027 return GENERAL_REGS;
9028 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9030 return GENERAL_REGS;
9034 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9035 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9036 && neon_vector_mem_operand (x, 0))
9039 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9042 return GENERAL_REGS;
9045 /* Values which must be returned in the most-significant end of the return
9049 arm_return_in_msb (const_tree valtype)
9051 return (TARGET_AAPCS_BASED
9053 && (AGGREGATE_TYPE_P (valtype)
9054 || TREE_CODE (valtype) == COMPLEX_TYPE));
9057 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9058 Use by the Cirrus Maverick code which has to workaround
9059 a hardware bug triggered by such instructions. */
9061 arm_memory_load_p (rtx insn)
9063 rtx body, lhs, rhs;;
9065 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9068 body = PATTERN (insn);
9070 if (GET_CODE (body) != SET)
9073 lhs = XEXP (body, 0);
9074 rhs = XEXP (body, 1);
9076 lhs = REG_OR_SUBREG_RTX (lhs);
9078 /* If the destination is not a general purpose
9079 register we do not have to worry. */
9080 if (GET_CODE (lhs) != REG
9081 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9084 /* As well as loads from memory we also have to react
9085 to loads of invalid constants which will be turned
9086 into loads from the minipool. */
9087 return (GET_CODE (rhs) == MEM
9088 || GET_CODE (rhs) == SYMBOL_REF
9089 || note_invalid_constants (insn, -1, false));
9092 /* Return TRUE if INSN is a Cirrus instruction. */
9094 arm_cirrus_insn_p (rtx insn)
9096 enum attr_cirrus attr;
9098 /* get_attr cannot accept USE or CLOBBER. */
9100 || GET_CODE (insn) != INSN
9101 || GET_CODE (PATTERN (insn)) == USE
9102 || GET_CODE (PATTERN (insn)) == CLOBBER)
9105 attr = get_attr_cirrus (insn);
9107 return attr != CIRRUS_NOT;
9110 /* Cirrus reorg for invalid instruction combinations. */
9112 cirrus_reorg (rtx first)
9114 enum attr_cirrus attr;
9115 rtx body = PATTERN (first);
9119 /* Any branch must be followed by 2 non Cirrus instructions. */
9120 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9123 t = next_nonnote_insn (first);
9125 if (arm_cirrus_insn_p (t))
9128 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9132 emit_insn_after (gen_nop (), first);
9137 /* (float (blah)) is in parallel with a clobber. */
9138 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9139 body = XVECEXP (body, 0, 0);
9141 if (GET_CODE (body) == SET)
9143 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9145 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9146 be followed by a non Cirrus insn. */
9147 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9149 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9150 emit_insn_after (gen_nop (), first);
9154 else if (arm_memory_load_p (first))
9156 unsigned int arm_regno;
9158 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9159 ldr/cfmv64hr combination where the Rd field is the same
9160 in both instructions must be split with a non Cirrus
9167 /* Get Arm register number for ldr insn. */
9168 if (GET_CODE (lhs) == REG)
9169 arm_regno = REGNO (lhs);
9172 gcc_assert (GET_CODE (rhs) == REG);
9173 arm_regno = REGNO (rhs);
9177 first = next_nonnote_insn (first);
9179 if (! arm_cirrus_insn_p (first))
9182 body = PATTERN (first);
9184 /* (float (blah)) is in parallel with a clobber. */
9185 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9186 body = XVECEXP (body, 0, 0);
9188 if (GET_CODE (body) == FLOAT)
9189 body = XEXP (body, 0);
9191 if (get_attr_cirrus (first) == CIRRUS_MOVE
9192 && GET_CODE (XEXP (body, 1)) == REG
9193 && arm_regno == REGNO (XEXP (body, 1)))
9194 emit_insn_after (gen_nop (), first);
9200 /* get_attr cannot accept USE or CLOBBER. */
9202 || GET_CODE (first) != INSN
9203 || GET_CODE (PATTERN (first)) == USE
9204 || GET_CODE (PATTERN (first)) == CLOBBER)
9207 attr = get_attr_cirrus (first);
9209 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9210 must be followed by a non-coprocessor instruction. */
9211 if (attr == CIRRUS_COMPARE)
9215 t = next_nonnote_insn (first);
9217 if (arm_cirrus_insn_p (t))
9220 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9224 emit_insn_after (gen_nop (), first);
9230 /* Return TRUE if X references a SYMBOL_REF. */
9232 symbol_mentioned_p (rtx x)
9237 if (GET_CODE (x) == SYMBOL_REF)
9240 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9241 are constant offsets, not symbols. */
9242 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9245 fmt = GET_RTX_FORMAT (GET_CODE (x));
9247 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9253 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9254 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9257 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9264 /* Return TRUE if X references a LABEL_REF. */
9266 label_mentioned_p (rtx x)
9271 if (GET_CODE (x) == LABEL_REF)
9274 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9275 instruction, but they are constant offsets, not symbols. */
9276 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9279 fmt = GET_RTX_FORMAT (GET_CODE (x));
9280 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9286 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9287 if (label_mentioned_p (XVECEXP (x, i, j)))
9290 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9298 tls_mentioned_p (rtx x)
9300 switch (GET_CODE (x))
9303 return tls_mentioned_p (XEXP (x, 0));
9306 if (XINT (x, 1) == UNSPEC_TLS)
9314 /* Must not copy any rtx that uses a pc-relative address. */
9317 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9319 if (GET_CODE (*x) == UNSPEC
9320 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9326 arm_cannot_copy_insn_p (rtx insn)
9328 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9334 enum rtx_code code = GET_CODE (x);
9351 /* Return 1 if memory locations are adjacent. */
9353 adjacent_mem_locations (rtx a, rtx b)
9355 /* We don't guarantee to preserve the order of these memory refs. */
9356 if (volatile_refs_p (a) || volatile_refs_p (b))
9359 if ((GET_CODE (XEXP (a, 0)) == REG
9360 || (GET_CODE (XEXP (a, 0)) == PLUS
9361 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9362 && (GET_CODE (XEXP (b, 0)) == REG
9363 || (GET_CODE (XEXP (b, 0)) == PLUS
9364 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9366 HOST_WIDE_INT val0 = 0, val1 = 0;
9370 if (GET_CODE (XEXP (a, 0)) == PLUS)
9372 reg0 = XEXP (XEXP (a, 0), 0);
9373 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9378 if (GET_CODE (XEXP (b, 0)) == PLUS)
9380 reg1 = XEXP (XEXP (b, 0), 0);
9381 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9386 /* Don't accept any offset that will require multiple
9387 instructions to handle, since this would cause the
9388 arith_adjacentmem pattern to output an overlong sequence. */
9389 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9392 /* Don't allow an eliminable register: register elimination can make
9393 the offset too large. */
9394 if (arm_eliminable_register (reg0))
9397 val_diff = val1 - val0;
9401 /* If the target has load delay slots, then there's no benefit
9402 to using an ldm instruction unless the offset is zero and
9403 we are optimizing for size. */
9404 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9405 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9406 && (val_diff == 4 || val_diff == -4));
9409 return ((REGNO (reg0) == REGNO (reg1))
9410 && (val_diff == 4 || val_diff == -4));
9416 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9417 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9418 instruction. ADD_OFFSET is nonzero if the base address register needs
9419 to be modified with an add instruction before we can use it. */
9422 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9423 int nops, HOST_WIDE_INT add_offset)
9425 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9426 if the offset isn't small enough. The reason 2 ldrs are faster
9427 is because these ARMs are able to do more than one cache access
9428 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9429 whilst the ARM8 has a double bandwidth cache. This means that
9430 these cores can do both an instruction fetch and a data fetch in
9431 a single cycle, so the trick of calculating the address into a
9432 scratch register (one of the result regs) and then doing a load
9433 multiple actually becomes slower (and no smaller in code size).
9434 That is the transformation
9436 ldr rd1, [rbase + offset]
9437 ldr rd2, [rbase + offset + 4]
9441 add rd1, rbase, offset
9442 ldmia rd1, {rd1, rd2}
9444 produces worse code -- '3 cycles + any stalls on rd2' instead of
9445 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9446 access per cycle, the first sequence could never complete in less
9447 than 6 cycles, whereas the ldm sequence would only take 5 and
9448 would make better use of sequential accesses if not hitting the
9451 We cheat here and test 'arm_ld_sched' which we currently know to
9452 only be true for the ARM8, ARM9 and StrongARM. If this ever
9453 changes, then the test below needs to be reworked. */
9454 if (nops == 2 && arm_ld_sched && add_offset != 0)
9457 /* XScale has load-store double instructions, but they have stricter
9458 alignment requirements than load-store multiple, so we cannot
9461 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9462 the pipeline until completion.
9470 An ldr instruction takes 1-3 cycles, but does not block the
9479 Best case ldr will always win. However, the more ldr instructions
9480 we issue, the less likely we are to be able to schedule them well.
9481 Using ldr instructions also increases code size.
9483 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9484 for counts of 3 or 4 regs. */
9485 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9490 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9491 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9492 an array ORDER which describes the sequence to use when accessing the
9493 offsets that produces an ascending order. In this sequence, each
9494 offset must be larger by exactly 4 than the previous one. ORDER[0]
9495 must have been filled in with the lowest offset by the caller.
9496 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9497 we use to verify that ORDER produces an ascending order of registers.
9498 Return true if it was possible to construct such an order, false if
9502 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9506 for (i = 1; i < nops; i++)
9510 order[i] = order[i - 1];
9511 for (j = 0; j < nops; j++)
9512 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9514 /* We must find exactly one offset that is higher than the
9515 previous one by 4. */
9516 if (order[i] != order[i - 1])
9520 if (order[i] == order[i - 1])
9522 /* The register numbers must be ascending. */
9523 if (unsorted_regs != NULL
9524 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9530 /* Used to determine in a peephole whether a sequence of load
9531 instructions can be changed into a load-multiple instruction.
9532 NOPS is the number of separate load instructions we are examining. The
9533 first NOPS entries in OPERANDS are the destination registers, the
9534 next NOPS entries are memory operands. If this function is
9535 successful, *BASE is set to the common base register of the memory
9536 accesses; *LOAD_OFFSET is set to the first memory location's offset
9537 from that base register.
9538 REGS is an array filled in with the destination register numbers.
9539 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9540 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9541 the sequence of registers in REGS matches the loads from ascending memory
9542 locations, and the function verifies that the register numbers are
9543 themselves ascending. If CHECK_REGS is false, the register numbers
9544 are stored in the order they are found in the operands. */
9546 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9547 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9549 int unsorted_regs[MAX_LDM_STM_OPS];
9550 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9551 int order[MAX_LDM_STM_OPS];
9552 rtx base_reg_rtx = NULL;
9556 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9557 easily extended if required. */
9558 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9560 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9562 /* Loop over the operands and check that the memory references are
9563 suitable (i.e. immediate offsets from the same base register). At
9564 the same time, extract the target register, and the memory
9566 for (i = 0; i < nops; i++)
9571 /* Convert a subreg of a mem into the mem itself. */
9572 if (GET_CODE (operands[nops + i]) == SUBREG)
9573 operands[nops + i] = alter_subreg (operands + (nops + i));
9575 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9577 /* Don't reorder volatile memory references; it doesn't seem worth
9578 looking for the case where the order is ok anyway. */
9579 if (MEM_VOLATILE_P (operands[nops + i]))
9582 offset = const0_rtx;
9584 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9585 || (GET_CODE (reg) == SUBREG
9586 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9587 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9588 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9590 || (GET_CODE (reg) == SUBREG
9591 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9592 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9597 base_reg = REGNO (reg);
9599 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9602 else if (base_reg != (int) REGNO (reg))
9603 /* Not addressed from the same base register. */
9606 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9607 ? REGNO (operands[i])
9608 : REGNO (SUBREG_REG (operands[i])));
9610 /* If it isn't an integer register, or if it overwrites the
9611 base register but isn't the last insn in the list, then
9612 we can't do this. */
9613 if (unsorted_regs[i] < 0
9614 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9615 || unsorted_regs[i] > 14
9616 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9619 unsorted_offsets[i] = INTVAL (offset);
9620 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9624 /* Not a suitable memory address. */
9628 /* All the useful information has now been extracted from the
9629 operands into unsorted_regs and unsorted_offsets; additionally,
9630 order[0] has been set to the lowest offset in the list. Sort
9631 the offsets into order, verifying that they are adjacent, and
9632 check that the register numbers are ascending. */
9633 if (!compute_offset_order (nops, unsorted_offsets, order,
9634 check_regs ? unsorted_regs : NULL))
9638 memcpy (saved_order, order, sizeof order);
9644 for (i = 0; i < nops; i++)
9645 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9647 *load_offset = unsorted_offsets[order[0]];
9651 && !peep2_reg_dead_p (nops, base_reg_rtx))
9654 if (unsorted_offsets[order[0]] == 0)
9655 ldm_case = 1; /* ldmia */
9656 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9657 ldm_case = 2; /* ldmib */
9658 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9659 ldm_case = 3; /* ldmda */
9660 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9661 ldm_case = 4; /* ldmdb */
9662 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9663 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9668 if (!multiple_operation_profitable_p (false, nops,
9670 ? unsorted_offsets[order[0]] : 0))
9676 /* Used to determine in a peephole whether a sequence of store instructions can
9677 be changed into a store-multiple instruction.
9678 NOPS is the number of separate store instructions we are examining.
9679 NOPS_TOTAL is the total number of instructions recognized by the peephole
9681 The first NOPS entries in OPERANDS are the source registers, the next
9682 NOPS entries are memory operands. If this function is successful, *BASE is
9683 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9684 to the first memory location's offset from that base register. REGS is an
9685 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9686 likewise filled with the corresponding rtx's.
9687 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9688 numbers to to an ascending order of stores.
9689 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9690 from ascending memory locations, and the function verifies that the register
9691 numbers are themselves ascending. If CHECK_REGS is false, the register
9692 numbers are stored in the order they are found in the operands. */
9694 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9695 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9696 HOST_WIDE_INT *load_offset, bool check_regs)
9698 int unsorted_regs[MAX_LDM_STM_OPS];
9699 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9700 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9701 int order[MAX_LDM_STM_OPS];
9703 rtx base_reg_rtx = NULL;
9706 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9707 easily extended if required. */
9708 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9710 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9712 /* Loop over the operands and check that the memory references are
9713 suitable (i.e. immediate offsets from the same base register). At
9714 the same time, extract the target register, and the memory
9716 for (i = 0; i < nops; i++)
9721 /* Convert a subreg of a mem into the mem itself. */
9722 if (GET_CODE (operands[nops + i]) == SUBREG)
9723 operands[nops + i] = alter_subreg (operands + (nops + i));
9725 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9727 /* Don't reorder volatile memory references; it doesn't seem worth
9728 looking for the case where the order is ok anyway. */
9729 if (MEM_VOLATILE_P (operands[nops + i]))
9732 offset = const0_rtx;
9734 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9735 || (GET_CODE (reg) == SUBREG
9736 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9737 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9738 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9740 || (GET_CODE (reg) == SUBREG
9741 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9742 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9745 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9746 ? operands[i] : SUBREG_REG (operands[i]));
9747 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9751 base_reg = REGNO (reg);
9753 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9756 else if (base_reg != (int) REGNO (reg))
9757 /* Not addressed from the same base register. */
9760 /* If it isn't an integer register, then we can't do this. */
9761 if (unsorted_regs[i] < 0
9762 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9763 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9764 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9765 || unsorted_regs[i] > 14)
9768 unsorted_offsets[i] = INTVAL (offset);
9769 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9773 /* Not a suitable memory address. */
9777 /* All the useful information has now been extracted from the
9778 operands into unsorted_regs and unsorted_offsets; additionally,
9779 order[0] has been set to the lowest offset in the list. Sort
9780 the offsets into order, verifying that they are adjacent, and
9781 check that the register numbers are ascending. */
9782 if (!compute_offset_order (nops, unsorted_offsets, order,
9783 check_regs ? unsorted_regs : NULL))
9787 memcpy (saved_order, order, sizeof order);
9793 for (i = 0; i < nops; i++)
9795 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9797 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9800 *load_offset = unsorted_offsets[order[0]];
9804 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9807 if (unsorted_offsets[order[0]] == 0)
9808 stm_case = 1; /* stmia */
9809 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9810 stm_case = 2; /* stmib */
9811 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9812 stm_case = 3; /* stmda */
9813 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9814 stm_case = 4; /* stmdb */
9818 if (!multiple_operation_profitable_p (false, nops, 0))
9824 /* Routines for use in generating RTL. */
9826 /* Generate a load-multiple instruction. COUNT is the number of loads in
9827 the instruction; REGS and MEMS are arrays containing the operands.
9828 BASEREG is the base register to be used in addressing the memory operands.
9829 WBACK_OFFSET is nonzero if the instruction should update the base
9833 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9834 HOST_WIDE_INT wback_offset)
9839 if (!multiple_operation_profitable_p (false, count, 0))
9845 for (i = 0; i < count; i++)
9846 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9848 if (wback_offset != 0)
9849 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9857 result = gen_rtx_PARALLEL (VOIDmode,
9858 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9859 if (wback_offset != 0)
9861 XVECEXP (result, 0, 0)
9862 = gen_rtx_SET (VOIDmode, basereg,
9863 plus_constant (basereg, wback_offset));
9868 for (j = 0; i < count; i++, j++)
9869 XVECEXP (result, 0, i)
9870 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9875 /* Generate a store-multiple instruction. COUNT is the number of stores in
9876 the instruction; REGS and MEMS are arrays containing the operands.
9877 BASEREG is the base register to be used in addressing the memory operands.
9878 WBACK_OFFSET is nonzero if the instruction should update the base
9882 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9883 HOST_WIDE_INT wback_offset)
9888 if (GET_CODE (basereg) == PLUS)
9889 basereg = XEXP (basereg, 0);
9891 if (!multiple_operation_profitable_p (false, count, 0))
9897 for (i = 0; i < count; i++)
9898 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9900 if (wback_offset != 0)
9901 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9909 result = gen_rtx_PARALLEL (VOIDmode,
9910 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9911 if (wback_offset != 0)
9913 XVECEXP (result, 0, 0)
9914 = gen_rtx_SET (VOIDmode, basereg,
9915 plus_constant (basereg, wback_offset));
9920 for (j = 0; i < count; i++, j++)
9921 XVECEXP (result, 0, i)
9922 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9927 /* Generate either a load-multiple or a store-multiple instruction. This
9928 function can be used in situations where we can start with a single MEM
9929 rtx and adjust its address upwards.
9930 COUNT is the number of operations in the instruction, not counting a
9931 possible update of the base register. REGS is an array containing the
9933 BASEREG is the base register to be used in addressing the memory operands,
9934 which are constructed from BASEMEM.
9935 WRITE_BACK specifies whether the generated instruction should include an
9936 update of the base register.
9937 OFFSETP is used to pass an offset to and from this function; this offset
9938 is not used when constructing the address (instead BASEMEM should have an
9939 appropriate offset in its address), it is used only for setting
9940 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9943 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9944 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9946 rtx mems[MAX_LDM_STM_OPS];
9947 HOST_WIDE_INT offset = *offsetp;
9950 gcc_assert (count <= MAX_LDM_STM_OPS);
9952 if (GET_CODE (basereg) == PLUS)
9953 basereg = XEXP (basereg, 0);
9955 for (i = 0; i < count; i++)
9957 rtx addr = plus_constant (basereg, i * 4);
9958 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9966 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9967 write_back ? 4 * count : 0);
9969 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9970 write_back ? 4 * count : 0);
9974 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9975 rtx basemem, HOST_WIDE_INT *offsetp)
9977 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9982 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9983 rtx basemem, HOST_WIDE_INT *offsetp)
9985 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9989 /* Called from a peephole2 expander to turn a sequence of loads into an
9990 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9991 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9992 is true if we can reorder the registers because they are used commutatively
9994 Returns true iff we could generate a new instruction. */
9997 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9999 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10000 rtx mems[MAX_LDM_STM_OPS];
10001 int i, j, base_reg;
10003 HOST_WIDE_INT offset;
10004 int write_back = FALSE;
10008 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10009 &base_reg, &offset, !sort_regs);
10015 for (i = 0; i < nops - 1; i++)
10016 for (j = i + 1; j < nops; j++)
10017 if (regs[i] > regs[j])
10023 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10027 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10028 gcc_assert (ldm_case == 1 || ldm_case == 5);
10034 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10035 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10037 if (!TARGET_THUMB1)
10039 base_reg = regs[0];
10040 base_reg_rtx = newbase;
10044 for (i = 0; i < nops; i++)
10046 addr = plus_constant (base_reg_rtx, offset + i * 4);
10047 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10050 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10051 write_back ? offset + i * 4 : 0));
10055 /* Called from a peephole2 expander to turn a sequence of stores into an
10056 STM instruction. OPERANDS are the operands found by the peephole matcher;
10057 NOPS indicates how many separate stores we are trying to combine.
10058 Returns true iff we could generate a new instruction. */
10061 gen_stm_seq (rtx *operands, int nops)
10064 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10065 rtx mems[MAX_LDM_STM_OPS];
10068 HOST_WIDE_INT offset;
10069 int write_back = FALSE;
10072 bool base_reg_dies;
10074 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10075 mem_order, &base_reg, &offset, true);
10080 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10082 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10085 gcc_assert (base_reg_dies);
10091 gcc_assert (base_reg_dies);
10092 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10096 addr = plus_constant (base_reg_rtx, offset);
10098 for (i = 0; i < nops; i++)
10100 addr = plus_constant (base_reg_rtx, offset + i * 4);
10101 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10104 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10105 write_back ? offset + i * 4 : 0));
10109 /* Called from a peephole2 expander to turn a sequence of stores that are
10110 preceded by constant loads into an STM instruction. OPERANDS are the
10111 operands found by the peephole matcher; NOPS indicates how many
10112 separate stores we are trying to combine; there are 2 * NOPS
10113 instructions in the peephole.
10114 Returns true iff we could generate a new instruction. */
10117 gen_const_stm_seq (rtx *operands, int nops)
10119 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10120 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10121 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10122 rtx mems[MAX_LDM_STM_OPS];
10125 HOST_WIDE_INT offset;
10126 int write_back = FALSE;
10129 bool base_reg_dies;
10131 HARD_REG_SET allocated;
10133 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10134 mem_order, &base_reg, &offset, false);
10139 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10141 /* If the same register is used more than once, try to find a free
10143 CLEAR_HARD_REG_SET (allocated);
10144 for (i = 0; i < nops; i++)
10146 for (j = i + 1; j < nops; j++)
10147 if (regs[i] == regs[j])
10149 rtx t = peep2_find_free_register (0, nops * 2,
10150 TARGET_THUMB1 ? "l" : "r",
10151 SImode, &allocated);
10155 regs[i] = REGNO (t);
10159 /* Compute an ordering that maps the register numbers to an ascending
10162 for (i = 0; i < nops; i++)
10163 if (regs[i] < regs[reg_order[0]])
10166 for (i = 1; i < nops; i++)
10168 int this_order = reg_order[i - 1];
10169 for (j = 0; j < nops; j++)
10170 if (regs[j] > regs[reg_order[i - 1]]
10171 && (this_order == reg_order[i - 1]
10172 || regs[j] < regs[this_order]))
10174 reg_order[i] = this_order;
10177 /* Ensure that registers that must be live after the instruction end
10178 up with the correct value. */
10179 for (i = 0; i < nops; i++)
10181 int this_order = reg_order[i];
10182 if ((this_order != mem_order[i]
10183 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10184 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10188 /* Load the constants. */
10189 for (i = 0; i < nops; i++)
10191 rtx op = operands[2 * nops + mem_order[i]];
10192 sorted_regs[i] = regs[reg_order[i]];
10193 emit_move_insn (reg_rtxs[reg_order[i]], op);
10196 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10198 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10201 gcc_assert (base_reg_dies);
10207 gcc_assert (base_reg_dies);
10208 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10212 addr = plus_constant (base_reg_rtx, offset);
10214 for (i = 0; i < nops; i++)
10216 addr = plus_constant (base_reg_rtx, offset + i * 4);
10217 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10220 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10221 write_back ? offset + i * 4 : 0));
10226 arm_gen_movmemqi (rtx *operands)
10228 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10229 HOST_WIDE_INT srcoffset, dstoffset;
10231 rtx src, dst, srcbase, dstbase;
10232 rtx part_bytes_reg = NULL;
10235 if (GET_CODE (operands[2]) != CONST_INT
10236 || GET_CODE (operands[3]) != CONST_INT
10237 || INTVAL (operands[2]) > 64
10238 || INTVAL (operands[3]) & 3)
10241 dstbase = operands[0];
10242 srcbase = operands[1];
10244 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10245 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10247 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10248 out_words_to_go = INTVAL (operands[2]) / 4;
10249 last_bytes = INTVAL (operands[2]) & 3;
10250 dstoffset = srcoffset = 0;
10252 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10253 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10255 for (i = 0; in_words_to_go >= 2; i+=4)
10257 if (in_words_to_go > 4)
10258 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10259 TRUE, srcbase, &srcoffset));
10261 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10262 src, FALSE, srcbase,
10265 if (out_words_to_go)
10267 if (out_words_to_go > 4)
10268 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10269 TRUE, dstbase, &dstoffset));
10270 else if (out_words_to_go != 1)
10271 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10272 out_words_to_go, dst,
10275 dstbase, &dstoffset));
10278 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10279 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10280 if (last_bytes != 0)
10282 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10288 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10289 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10292 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10293 if (out_words_to_go)
10297 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10298 sreg = copy_to_reg (mem);
10300 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10301 emit_move_insn (mem, sreg);
10304 gcc_assert (!in_words_to_go); /* Sanity check */
10307 if (in_words_to_go)
10309 gcc_assert (in_words_to_go > 0);
10311 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10312 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10315 gcc_assert (!last_bytes || part_bytes_reg);
10317 if (BYTES_BIG_ENDIAN && last_bytes)
10319 rtx tmp = gen_reg_rtx (SImode);
10321 /* The bytes we want are in the top end of the word. */
10322 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10323 GEN_INT (8 * (4 - last_bytes))));
10324 part_bytes_reg = tmp;
10328 mem = adjust_automodify_address (dstbase, QImode,
10329 plus_constant (dst, last_bytes - 1),
10330 dstoffset + last_bytes - 1);
10331 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10335 tmp = gen_reg_rtx (SImode);
10336 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10337 part_bytes_reg = tmp;
10344 if (last_bytes > 1)
10346 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10347 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10351 rtx tmp = gen_reg_rtx (SImode);
10352 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10353 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10354 part_bytes_reg = tmp;
10361 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10362 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10369 /* Select a dominance comparison mode if possible for a test of the general
10370 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10371 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10372 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10373 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10374 In all cases OP will be either EQ or NE, but we don't need to know which
10375 here. If we are unable to support a dominance comparison we return
10376 CC mode. This will then fail to match for the RTL expressions that
10377 generate this call. */
10379 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10381 enum rtx_code cond1, cond2;
10384 /* Currently we will probably get the wrong result if the individual
10385 comparisons are not simple. This also ensures that it is safe to
10386 reverse a comparison if necessary. */
10387 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10389 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10393 /* The if_then_else variant of this tests the second condition if the
10394 first passes, but is true if the first fails. Reverse the first
10395 condition to get a true "inclusive-or" expression. */
10396 if (cond_or == DOM_CC_NX_OR_Y)
10397 cond1 = reverse_condition (cond1);
10399 /* If the comparisons are not equal, and one doesn't dominate the other,
10400 then we can't do this. */
10402 && !comparison_dominates_p (cond1, cond2)
10403 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10408 enum rtx_code temp = cond1;
10416 if (cond_or == DOM_CC_X_AND_Y)
10421 case EQ: return CC_DEQmode;
10422 case LE: return CC_DLEmode;
10423 case LEU: return CC_DLEUmode;
10424 case GE: return CC_DGEmode;
10425 case GEU: return CC_DGEUmode;
10426 default: gcc_unreachable ();
10430 if (cond_or == DOM_CC_X_AND_Y)
10442 gcc_unreachable ();
10446 if (cond_or == DOM_CC_X_AND_Y)
10458 gcc_unreachable ();
10462 if (cond_or == DOM_CC_X_AND_Y)
10463 return CC_DLTUmode;
10468 return CC_DLTUmode;
10470 return CC_DLEUmode;
10474 gcc_unreachable ();
10478 if (cond_or == DOM_CC_X_AND_Y)
10479 return CC_DGTUmode;
10484 return CC_DGTUmode;
10486 return CC_DGEUmode;
10490 gcc_unreachable ();
10493 /* The remaining cases only occur when both comparisons are the
10496 gcc_assert (cond1 == cond2);
10500 gcc_assert (cond1 == cond2);
10504 gcc_assert (cond1 == cond2);
10508 gcc_assert (cond1 == cond2);
10509 return CC_DLEUmode;
10512 gcc_assert (cond1 == cond2);
10513 return CC_DGEUmode;
10516 gcc_unreachable ();
10521 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10523 /* All floating point compares return CCFP if it is an equality
10524 comparison, and CCFPE otherwise. */
10525 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10545 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10550 gcc_unreachable ();
10554 /* A compare with a shifted operand. Because of canonicalization, the
10555 comparison will have to be swapped when we emit the assembler. */
10556 if (GET_MODE (y) == SImode
10557 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10558 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10559 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10560 || GET_CODE (x) == ROTATERT))
10563 /* This operation is performed swapped, but since we only rely on the Z
10564 flag we don't need an additional mode. */
10565 if (GET_MODE (y) == SImode
10566 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10567 && GET_CODE (x) == NEG
10568 && (op == EQ || op == NE))
10571 /* This is a special case that is used by combine to allow a
10572 comparison of a shifted byte load to be split into a zero-extend
10573 followed by a comparison of the shifted integer (only valid for
10574 equalities and unsigned inequalities). */
10575 if (GET_MODE (x) == SImode
10576 && GET_CODE (x) == ASHIFT
10577 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10578 && GET_CODE (XEXP (x, 0)) == SUBREG
10579 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10580 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10581 && (op == EQ || op == NE
10582 || op == GEU || op == GTU || op == LTU || op == LEU)
10583 && GET_CODE (y) == CONST_INT)
10586 /* A construct for a conditional compare, if the false arm contains
10587 0, then both conditions must be true, otherwise either condition
10588 must be true. Not all conditions are possible, so CCmode is
10589 returned if it can't be done. */
10590 if (GET_CODE (x) == IF_THEN_ELSE
10591 && (XEXP (x, 2) == const0_rtx
10592 || XEXP (x, 2) == const1_rtx)
10593 && COMPARISON_P (XEXP (x, 0))
10594 && COMPARISON_P (XEXP (x, 1)))
10595 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10596 INTVAL (XEXP (x, 2)));
10598 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10599 if (GET_CODE (x) == AND
10600 && COMPARISON_P (XEXP (x, 0))
10601 && COMPARISON_P (XEXP (x, 1)))
10602 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10605 if (GET_CODE (x) == IOR
10606 && COMPARISON_P (XEXP (x, 0))
10607 && COMPARISON_P (XEXP (x, 1)))
10608 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10611 /* An operation (on Thumb) where we want to test for a single bit.
10612 This is done by shifting that bit up into the top bit of a
10613 scratch register; we can then branch on the sign bit. */
10615 && GET_MODE (x) == SImode
10616 && (op == EQ || op == NE)
10617 && GET_CODE (x) == ZERO_EXTRACT
10618 && XEXP (x, 1) == const1_rtx)
10621 /* An operation that sets the condition codes as a side-effect, the
10622 V flag is not set correctly, so we can only use comparisons where
10623 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10625 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10626 if (GET_MODE (x) == SImode
10628 && (op == EQ || op == NE || op == LT || op == GE)
10629 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10630 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10631 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10632 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10633 || GET_CODE (x) == LSHIFTRT
10634 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10635 || GET_CODE (x) == ROTATERT
10636 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10637 return CC_NOOVmode;
10639 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10642 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10643 && GET_CODE (x) == PLUS
10644 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10647 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10649 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10651 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10658 /* A DImode comparison against zero can be implemented by
10659 or'ing the two halves together. */
10660 if (y == const0_rtx)
10663 /* We can do an equality test in three Thumb instructions. */
10673 /* DImode unsigned comparisons can be implemented by cmp +
10674 cmpeq without a scratch register. Not worth doing in
10685 /* DImode signed and unsigned comparisons can be implemented
10686 by cmp + sbcs with a scratch register, but that does not
10687 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10688 gcc_assert (op != EQ && op != NE);
10692 gcc_unreachable ();
10699 /* X and Y are two things to compare using CODE. Emit the compare insn and
10700 return the rtx for register 0 in the proper mode. FP means this is a
10701 floating point compare: I don't think that it is needed on the arm. */
10703 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10705 enum machine_mode mode;
10707 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10709 /* We might have X as a constant, Y as a register because of the predicates
10710 used for cmpdi. If so, force X to a register here. */
10711 if (dimode_comparison && !REG_P (x))
10712 x = force_reg (DImode, x);
10714 mode = SELECT_CC_MODE (code, x, y);
10715 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10717 if (dimode_comparison
10718 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10719 && mode != CC_CZmode)
10723 /* To compare two non-zero values for equality, XOR them and
10724 then compare against zero. Not used for ARM mode; there
10725 CC_CZmode is cheaper. */
10726 if (mode == CC_Zmode && y != const0_rtx)
10728 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10731 /* A scratch register is required. */
10732 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10733 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10734 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10737 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10742 /* Generate a sequence of insns that will generate the correct return
10743 address mask depending on the physical architecture that the program
10746 arm_gen_return_addr_mask (void)
10748 rtx reg = gen_reg_rtx (Pmode);
10750 emit_insn (gen_return_addr_mask (reg));
10755 arm_reload_in_hi (rtx *operands)
10757 rtx ref = operands[1];
10759 HOST_WIDE_INT offset = 0;
10761 if (GET_CODE (ref) == SUBREG)
10763 offset = SUBREG_BYTE (ref);
10764 ref = SUBREG_REG (ref);
10767 if (GET_CODE (ref) == REG)
10769 /* We have a pseudo which has been spilt onto the stack; there
10770 are two cases here: the first where there is a simple
10771 stack-slot replacement and a second where the stack-slot is
10772 out of range, or is used as a subreg. */
10773 if (reg_equiv_mem[REGNO (ref)])
10775 ref = reg_equiv_mem[REGNO (ref)];
10776 base = find_replacement (&XEXP (ref, 0));
10779 /* The slot is out of range, or was dressed up in a SUBREG. */
10780 base = reg_equiv_address[REGNO (ref)];
10783 base = find_replacement (&XEXP (ref, 0));
10785 /* Handle the case where the address is too complex to be offset by 1. */
10786 if (GET_CODE (base) == MINUS
10787 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10789 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10791 emit_set_insn (base_plus, base);
10794 else if (GET_CODE (base) == PLUS)
10796 /* The addend must be CONST_INT, or we would have dealt with it above. */
10797 HOST_WIDE_INT hi, lo;
10799 offset += INTVAL (XEXP (base, 1));
10800 base = XEXP (base, 0);
10802 /* Rework the address into a legal sequence of insns. */
10803 /* Valid range for lo is -4095 -> 4095 */
10806 : -((-offset) & 0xfff));
10808 /* Corner case, if lo is the max offset then we would be out of range
10809 once we have added the additional 1 below, so bump the msb into the
10810 pre-loading insn(s). */
10814 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10815 ^ (HOST_WIDE_INT) 0x80000000)
10816 - (HOST_WIDE_INT) 0x80000000);
10818 gcc_assert (hi + lo == offset);
10822 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10824 /* Get the base address; addsi3 knows how to handle constants
10825 that require more than one insn. */
10826 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10832 /* Operands[2] may overlap operands[0] (though it won't overlap
10833 operands[1]), that's why we asked for a DImode reg -- so we can
10834 use the bit that does not overlap. */
10835 if (REGNO (operands[2]) == REGNO (operands[0]))
10836 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10838 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10840 emit_insn (gen_zero_extendqisi2 (scratch,
10841 gen_rtx_MEM (QImode,
10842 plus_constant (base,
10844 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10845 gen_rtx_MEM (QImode,
10846 plus_constant (base,
10848 if (!BYTES_BIG_ENDIAN)
10849 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10850 gen_rtx_IOR (SImode,
10853 gen_rtx_SUBREG (SImode, operands[0], 0),
10857 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10858 gen_rtx_IOR (SImode,
10859 gen_rtx_ASHIFT (SImode, scratch,
10861 gen_rtx_SUBREG (SImode, operands[0], 0)));
10864 /* Handle storing a half-word to memory during reload by synthesizing as two
10865 byte stores. Take care not to clobber the input values until after we
10866 have moved them somewhere safe. This code assumes that if the DImode
10867 scratch in operands[2] overlaps either the input value or output address
10868 in some way, then that value must die in this insn (we absolutely need
10869 two scratch registers for some corner cases). */
10871 arm_reload_out_hi (rtx *operands)
10873 rtx ref = operands[0];
10874 rtx outval = operands[1];
10876 HOST_WIDE_INT offset = 0;
10878 if (GET_CODE (ref) == SUBREG)
10880 offset = SUBREG_BYTE (ref);
10881 ref = SUBREG_REG (ref);
10884 if (GET_CODE (ref) == REG)
10886 /* We have a pseudo which has been spilt onto the stack; there
10887 are two cases here: the first where there is a simple
10888 stack-slot replacement and a second where the stack-slot is
10889 out of range, or is used as a subreg. */
10890 if (reg_equiv_mem[REGNO (ref)])
10892 ref = reg_equiv_mem[REGNO (ref)];
10893 base = find_replacement (&XEXP (ref, 0));
10896 /* The slot is out of range, or was dressed up in a SUBREG. */
10897 base = reg_equiv_address[REGNO (ref)];
10900 base = find_replacement (&XEXP (ref, 0));
10902 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10904 /* Handle the case where the address is too complex to be offset by 1. */
10905 if (GET_CODE (base) == MINUS
10906 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10908 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10910 /* Be careful not to destroy OUTVAL. */
10911 if (reg_overlap_mentioned_p (base_plus, outval))
10913 /* Updating base_plus might destroy outval, see if we can
10914 swap the scratch and base_plus. */
10915 if (!reg_overlap_mentioned_p (scratch, outval))
10918 scratch = base_plus;
10923 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10925 /* Be conservative and copy OUTVAL into the scratch now,
10926 this should only be necessary if outval is a subreg
10927 of something larger than a word. */
10928 /* XXX Might this clobber base? I can't see how it can,
10929 since scratch is known to overlap with OUTVAL, and
10930 must be wider than a word. */
10931 emit_insn (gen_movhi (scratch_hi, outval));
10932 outval = scratch_hi;
10936 emit_set_insn (base_plus, base);
10939 else if (GET_CODE (base) == PLUS)
10941 /* The addend must be CONST_INT, or we would have dealt with it above. */
10942 HOST_WIDE_INT hi, lo;
10944 offset += INTVAL (XEXP (base, 1));
10945 base = XEXP (base, 0);
10947 /* Rework the address into a legal sequence of insns. */
10948 /* Valid range for lo is -4095 -> 4095 */
10951 : -((-offset) & 0xfff));
10953 /* Corner case, if lo is the max offset then we would be out of range
10954 once we have added the additional 1 below, so bump the msb into the
10955 pre-loading insn(s). */
10959 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10960 ^ (HOST_WIDE_INT) 0x80000000)
10961 - (HOST_WIDE_INT) 0x80000000);
10963 gcc_assert (hi + lo == offset);
10967 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10969 /* Be careful not to destroy OUTVAL. */
10970 if (reg_overlap_mentioned_p (base_plus, outval))
10972 /* Updating base_plus might destroy outval, see if we
10973 can swap the scratch and base_plus. */
10974 if (!reg_overlap_mentioned_p (scratch, outval))
10977 scratch = base_plus;
10982 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10984 /* Be conservative and copy outval into scratch now,
10985 this should only be necessary if outval is a
10986 subreg of something larger than a word. */
10987 /* XXX Might this clobber base? I can't see how it
10988 can, since scratch is known to overlap with
10990 emit_insn (gen_movhi (scratch_hi, outval));
10991 outval = scratch_hi;
10995 /* Get the base address; addsi3 knows how to handle constants
10996 that require more than one insn. */
10997 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11003 if (BYTES_BIG_ENDIAN)
11005 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11006 plus_constant (base, offset + 1)),
11007 gen_lowpart (QImode, outval)));
11008 emit_insn (gen_lshrsi3 (scratch,
11009 gen_rtx_SUBREG (SImode, outval, 0),
11011 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11012 gen_lowpart (QImode, scratch)));
11016 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11017 gen_lowpart (QImode, outval)));
11018 emit_insn (gen_lshrsi3 (scratch,
11019 gen_rtx_SUBREG (SImode, outval, 0),
11021 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11022 plus_constant (base, offset + 1)),
11023 gen_lowpart (QImode, scratch)));
11027 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11028 (padded to the size of a word) should be passed in a register. */
11031 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11033 if (TARGET_AAPCS_BASED)
11034 return must_pass_in_stack_var_size (mode, type);
11036 return must_pass_in_stack_var_size_or_pad (mode, type);
11040 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11041 Return true if an argument passed on the stack should be padded upwards,
11042 i.e. if the least-significant byte has useful data.
11043 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11044 aggregate types are placed in the lowest memory address. */
11047 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11049 if (!TARGET_AAPCS_BASED)
11050 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11052 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11059 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11060 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11061 byte of the register has useful data, and return the opposite if the
11062 most significant byte does.
11063 For AAPCS, small aggregates and small complex types are always padded
11067 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11068 tree type, int first ATTRIBUTE_UNUSED)
11070 if (TARGET_AAPCS_BASED
11071 && BYTES_BIG_ENDIAN
11072 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11073 && int_size_in_bytes (type) <= 4)
11076 /* Otherwise, use default padding. */
11077 return !BYTES_BIG_ENDIAN;
11081 /* Print a symbolic form of X to the debug file, F. */
11083 arm_print_value (FILE *f, rtx x)
11085 switch (GET_CODE (x))
11088 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11092 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11100 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11102 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11103 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11111 fprintf (f, "\"%s\"", XSTR (x, 0));
11115 fprintf (f, "`%s'", XSTR (x, 0));
11119 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11123 arm_print_value (f, XEXP (x, 0));
11127 arm_print_value (f, XEXP (x, 0));
11129 arm_print_value (f, XEXP (x, 1));
11137 fprintf (f, "????");
11142 /* Routines for manipulation of the constant pool. */
11144 /* Arm instructions cannot load a large constant directly into a
11145 register; they have to come from a pc relative load. The constant
11146 must therefore be placed in the addressable range of the pc
11147 relative load. Depending on the precise pc relative load
11148 instruction the range is somewhere between 256 bytes and 4k. This
11149 means that we often have to dump a constant inside a function, and
11150 generate code to branch around it.
11152 It is important to minimize this, since the branches will slow
11153 things down and make the code larger.
11155 Normally we can hide the table after an existing unconditional
11156 branch so that there is no interruption of the flow, but in the
11157 worst case the code looks like this:
11175 We fix this by performing a scan after scheduling, which notices
11176 which instructions need to have their operands fetched from the
11177 constant table and builds the table.
11179 The algorithm starts by building a table of all the constants that
11180 need fixing up and all the natural barriers in the function (places
11181 where a constant table can be dropped without breaking the flow).
11182 For each fixup we note how far the pc-relative replacement will be
11183 able to reach and the offset of the instruction into the function.
11185 Having built the table we then group the fixes together to form
11186 tables that are as large as possible (subject to addressing
11187 constraints) and emit each table of constants after the last
11188 barrier that is within range of all the instructions in the group.
11189 If a group does not contain a barrier, then we forcibly create one
11190 by inserting a jump instruction into the flow. Once the table has
11191 been inserted, the insns are then modified to reference the
11192 relevant entry in the pool.
11194 Possible enhancements to the algorithm (not implemented) are:
11196 1) For some processors and object formats, there may be benefit in
11197 aligning the pools to the start of cache lines; this alignment
11198 would need to be taken into account when calculating addressability
11201 /* These typedefs are located at the start of this file, so that
11202 they can be used in the prototypes there. This comment is to
11203 remind readers of that fact so that the following structures
11204 can be understood more easily.
11206 typedef struct minipool_node Mnode;
11207 typedef struct minipool_fixup Mfix; */
11209 struct minipool_node
11211 /* Doubly linked chain of entries. */
11214 /* The maximum offset into the code that this entry can be placed. While
11215 pushing fixes for forward references, all entries are sorted in order
11216 of increasing max_address. */
11217 HOST_WIDE_INT max_address;
11218 /* Similarly for an entry inserted for a backwards ref. */
11219 HOST_WIDE_INT min_address;
11220 /* The number of fixes referencing this entry. This can become zero
11221 if we "unpush" an entry. In this case we ignore the entry when we
11222 come to emit the code. */
11224 /* The offset from the start of the minipool. */
11225 HOST_WIDE_INT offset;
11226 /* The value in table. */
11228 /* The mode of value. */
11229 enum machine_mode mode;
11230 /* The size of the value. With iWMMXt enabled
11231 sizes > 4 also imply an alignment of 8-bytes. */
11235 struct minipool_fixup
11239 HOST_WIDE_INT address;
11241 enum machine_mode mode;
11245 HOST_WIDE_INT forwards;
11246 HOST_WIDE_INT backwards;
11249 /* Fixes less than a word need padding out to a word boundary. */
11250 #define MINIPOOL_FIX_SIZE(mode) \
11251 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11253 static Mnode * minipool_vector_head;
11254 static Mnode * minipool_vector_tail;
11255 static rtx minipool_vector_label;
11256 static int minipool_pad;
11258 /* The linked list of all minipool fixes required for this function. */
11259 Mfix * minipool_fix_head;
11260 Mfix * minipool_fix_tail;
11261 /* The fix entry for the current minipool, once it has been placed. */
11262 Mfix * minipool_barrier;
11264 /* Determines if INSN is the start of a jump table. Returns the end
11265 of the TABLE or NULL_RTX. */
11267 is_jump_table (rtx insn)
11271 if (GET_CODE (insn) == JUMP_INSN
11272 && JUMP_LABEL (insn) != NULL
11273 && ((table = next_real_insn (JUMP_LABEL (insn)))
11274 == next_real_insn (insn))
11276 && GET_CODE (table) == JUMP_INSN
11277 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11278 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11284 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11285 #define JUMP_TABLES_IN_TEXT_SECTION 0
11288 static HOST_WIDE_INT
11289 get_jump_table_size (rtx insn)
11291 /* ADDR_VECs only take room if read-only data does into the text
11293 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11295 rtx body = PATTERN (insn);
11296 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11297 HOST_WIDE_INT size;
11298 HOST_WIDE_INT modesize;
11300 modesize = GET_MODE_SIZE (GET_MODE (body));
11301 size = modesize * XVECLEN (body, elt);
11305 /* Round up size of TBB table to a halfword boundary. */
11306 size = (size + 1) & ~(HOST_WIDE_INT)1;
11309 /* No padding necessary for TBH. */
11312 /* Add two bytes for alignment on Thumb. */
11317 gcc_unreachable ();
11325 /* Move a minipool fix MP from its current location to before MAX_MP.
11326 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11327 constraints may need updating. */
11329 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11330 HOST_WIDE_INT max_address)
11332 /* The code below assumes these are different. */
11333 gcc_assert (mp != max_mp);
11335 if (max_mp == NULL)
11337 if (max_address < mp->max_address)
11338 mp->max_address = max_address;
11342 if (max_address > max_mp->max_address - mp->fix_size)
11343 mp->max_address = max_mp->max_address - mp->fix_size;
11345 mp->max_address = max_address;
11347 /* Unlink MP from its current position. Since max_mp is non-null,
11348 mp->prev must be non-null. */
11349 mp->prev->next = mp->next;
11350 if (mp->next != NULL)
11351 mp->next->prev = mp->prev;
11353 minipool_vector_tail = mp->prev;
11355 /* Re-insert it before MAX_MP. */
11357 mp->prev = max_mp->prev;
11360 if (mp->prev != NULL)
11361 mp->prev->next = mp;
11363 minipool_vector_head = mp;
11366 /* Save the new entry. */
11369 /* Scan over the preceding entries and adjust their addresses as
11371 while (mp->prev != NULL
11372 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11374 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11381 /* Add a constant to the minipool for a forward reference. Returns the
11382 node added or NULL if the constant will not fit in this pool. */
11384 add_minipool_forward_ref (Mfix *fix)
11386 /* If set, max_mp is the first pool_entry that has a lower
11387 constraint than the one we are trying to add. */
11388 Mnode * max_mp = NULL;
11389 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11392 /* If the minipool starts before the end of FIX->INSN then this FIX
11393 can not be placed into the current pool. Furthermore, adding the
11394 new constant pool entry may cause the pool to start FIX_SIZE bytes
11396 if (minipool_vector_head &&
11397 (fix->address + get_attr_length (fix->insn)
11398 >= minipool_vector_head->max_address - fix->fix_size))
11401 /* Scan the pool to see if a constant with the same value has
11402 already been added. While we are doing this, also note the
11403 location where we must insert the constant if it doesn't already
11405 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11407 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11408 && fix->mode == mp->mode
11409 && (GET_CODE (fix->value) != CODE_LABEL
11410 || (CODE_LABEL_NUMBER (fix->value)
11411 == CODE_LABEL_NUMBER (mp->value)))
11412 && rtx_equal_p (fix->value, mp->value))
11414 /* More than one fix references this entry. */
11416 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11419 /* Note the insertion point if necessary. */
11421 && mp->max_address > max_address)
11424 /* If we are inserting an 8-bytes aligned quantity and
11425 we have not already found an insertion point, then
11426 make sure that all such 8-byte aligned quantities are
11427 placed at the start of the pool. */
11428 if (ARM_DOUBLEWORD_ALIGN
11430 && fix->fix_size >= 8
11431 && mp->fix_size < 8)
11434 max_address = mp->max_address;
11438 /* The value is not currently in the minipool, so we need to create
11439 a new entry for it. If MAX_MP is NULL, the entry will be put on
11440 the end of the list since the placement is less constrained than
11441 any existing entry. Otherwise, we insert the new fix before
11442 MAX_MP and, if necessary, adjust the constraints on the other
11445 mp->fix_size = fix->fix_size;
11446 mp->mode = fix->mode;
11447 mp->value = fix->value;
11449 /* Not yet required for a backwards ref. */
11450 mp->min_address = -65536;
11452 if (max_mp == NULL)
11454 mp->max_address = max_address;
11456 mp->prev = minipool_vector_tail;
11458 if (mp->prev == NULL)
11460 minipool_vector_head = mp;
11461 minipool_vector_label = gen_label_rtx ();
11464 mp->prev->next = mp;
11466 minipool_vector_tail = mp;
11470 if (max_address > max_mp->max_address - mp->fix_size)
11471 mp->max_address = max_mp->max_address - mp->fix_size;
11473 mp->max_address = max_address;
11476 mp->prev = max_mp->prev;
11478 if (mp->prev != NULL)
11479 mp->prev->next = mp;
11481 minipool_vector_head = mp;
11484 /* Save the new entry. */
11487 /* Scan over the preceding entries and adjust their addresses as
11489 while (mp->prev != NULL
11490 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11492 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11500 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11501 HOST_WIDE_INT min_address)
11503 HOST_WIDE_INT offset;
11505 /* The code below assumes these are different. */
11506 gcc_assert (mp != min_mp);
11508 if (min_mp == NULL)
11510 if (min_address > mp->min_address)
11511 mp->min_address = min_address;
11515 /* We will adjust this below if it is too loose. */
11516 mp->min_address = min_address;
11518 /* Unlink MP from its current position. Since min_mp is non-null,
11519 mp->next must be non-null. */
11520 mp->next->prev = mp->prev;
11521 if (mp->prev != NULL)
11522 mp->prev->next = mp->next;
11524 minipool_vector_head = mp->next;
11526 /* Reinsert it after MIN_MP. */
11528 mp->next = min_mp->next;
11530 if (mp->next != NULL)
11531 mp->next->prev = mp;
11533 minipool_vector_tail = mp;
11539 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11541 mp->offset = offset;
11542 if (mp->refcount > 0)
11543 offset += mp->fix_size;
11545 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11546 mp->next->min_address = mp->min_address + mp->fix_size;
11552 /* Add a constant to the minipool for a backward reference. Returns the
11553 node added or NULL if the constant will not fit in this pool.
11555 Note that the code for insertion for a backwards reference can be
11556 somewhat confusing because the calculated offsets for each fix do
11557 not take into account the size of the pool (which is still under
11560 add_minipool_backward_ref (Mfix *fix)
11562 /* If set, min_mp is the last pool_entry that has a lower constraint
11563 than the one we are trying to add. */
11564 Mnode *min_mp = NULL;
11565 /* This can be negative, since it is only a constraint. */
11566 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11569 /* If we can't reach the current pool from this insn, or if we can't
11570 insert this entry at the end of the pool without pushing other
11571 fixes out of range, then we don't try. This ensures that we
11572 can't fail later on. */
11573 if (min_address >= minipool_barrier->address
11574 || (minipool_vector_tail->min_address + fix->fix_size
11575 >= minipool_barrier->address))
11578 /* Scan the pool to see if a constant with the same value has
11579 already been added. While we are doing this, also note the
11580 location where we must insert the constant if it doesn't already
11582 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11584 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11585 && fix->mode == mp->mode
11586 && (GET_CODE (fix->value) != CODE_LABEL
11587 || (CODE_LABEL_NUMBER (fix->value)
11588 == CODE_LABEL_NUMBER (mp->value)))
11589 && rtx_equal_p (fix->value, mp->value)
11590 /* Check that there is enough slack to move this entry to the
11591 end of the table (this is conservative). */
11592 && (mp->max_address
11593 > (minipool_barrier->address
11594 + minipool_vector_tail->offset
11595 + minipool_vector_tail->fix_size)))
11598 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11601 if (min_mp != NULL)
11602 mp->min_address += fix->fix_size;
11605 /* Note the insertion point if necessary. */
11606 if (mp->min_address < min_address)
11608 /* For now, we do not allow the insertion of 8-byte alignment
11609 requiring nodes anywhere but at the start of the pool. */
11610 if (ARM_DOUBLEWORD_ALIGN
11611 && fix->fix_size >= 8 && mp->fix_size < 8)
11616 else if (mp->max_address
11617 < minipool_barrier->address + mp->offset + fix->fix_size)
11619 /* Inserting before this entry would push the fix beyond
11620 its maximum address (which can happen if we have
11621 re-located a forwards fix); force the new fix to come
11623 if (ARM_DOUBLEWORD_ALIGN
11624 && fix->fix_size >= 8 && mp->fix_size < 8)
11629 min_address = mp->min_address + fix->fix_size;
11632 /* Do not insert a non-8-byte aligned quantity before 8-byte
11633 aligned quantities. */
11634 else if (ARM_DOUBLEWORD_ALIGN
11635 && fix->fix_size < 8
11636 && mp->fix_size >= 8)
11639 min_address = mp->min_address + fix->fix_size;
11644 /* We need to create a new entry. */
11646 mp->fix_size = fix->fix_size;
11647 mp->mode = fix->mode;
11648 mp->value = fix->value;
11650 mp->max_address = minipool_barrier->address + 65536;
11652 mp->min_address = min_address;
11654 if (min_mp == NULL)
11657 mp->next = minipool_vector_head;
11659 if (mp->next == NULL)
11661 minipool_vector_tail = mp;
11662 minipool_vector_label = gen_label_rtx ();
11665 mp->next->prev = mp;
11667 minipool_vector_head = mp;
11671 mp->next = min_mp->next;
11675 if (mp->next != NULL)
11676 mp->next->prev = mp;
11678 minipool_vector_tail = mp;
11681 /* Save the new entry. */
11689 /* Scan over the following entries and adjust their offsets. */
11690 while (mp->next != NULL)
11692 if (mp->next->min_address < mp->min_address + mp->fix_size)
11693 mp->next->min_address = mp->min_address + mp->fix_size;
11696 mp->next->offset = mp->offset + mp->fix_size;
11698 mp->next->offset = mp->offset;
11707 assign_minipool_offsets (Mfix *barrier)
11709 HOST_WIDE_INT offset = 0;
11712 minipool_barrier = barrier;
11714 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11716 mp->offset = offset;
11718 if (mp->refcount > 0)
11719 offset += mp->fix_size;
11723 /* Output the literal table */
11725 dump_minipool (rtx scan)
11731 if (ARM_DOUBLEWORD_ALIGN)
11732 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11733 if (mp->refcount > 0 && mp->fix_size >= 8)
11740 fprintf (dump_file,
11741 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11742 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11744 scan = emit_label_after (gen_label_rtx (), scan);
11745 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11746 scan = emit_label_after (minipool_vector_label, scan);
11748 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11750 if (mp->refcount > 0)
11754 fprintf (dump_file,
11755 ";; Offset %u, min %ld, max %ld ",
11756 (unsigned) mp->offset, (unsigned long) mp->min_address,
11757 (unsigned long) mp->max_address);
11758 arm_print_value (dump_file, mp->value);
11759 fputc ('\n', dump_file);
11762 switch (mp->fix_size)
11764 #ifdef HAVE_consttable_1
11766 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11770 #ifdef HAVE_consttable_2
11772 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11776 #ifdef HAVE_consttable_4
11778 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11782 #ifdef HAVE_consttable_8
11784 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11788 #ifdef HAVE_consttable_16
11790 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11795 gcc_unreachable ();
11803 minipool_vector_head = minipool_vector_tail = NULL;
11804 scan = emit_insn_after (gen_consttable_end (), scan);
11805 scan = emit_barrier_after (scan);
11808 /* Return the cost of forcibly inserting a barrier after INSN. */
11810 arm_barrier_cost (rtx insn)
11812 /* Basing the location of the pool on the loop depth is preferable,
11813 but at the moment, the basic block information seems to be
11814 corrupt by this stage of the compilation. */
11815 int base_cost = 50;
11816 rtx next = next_nonnote_insn (insn);
11818 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11821 switch (GET_CODE (insn))
11824 /* It will always be better to place the table before the label, rather
11833 return base_cost - 10;
11836 return base_cost + 10;
11840 /* Find the best place in the insn stream in the range
11841 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11842 Create the barrier by inserting a jump and add a new fix entry for
11845 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11847 HOST_WIDE_INT count = 0;
11849 rtx from = fix->insn;
11850 /* The instruction after which we will insert the jump. */
11851 rtx selected = NULL;
11853 /* The address at which the jump instruction will be placed. */
11854 HOST_WIDE_INT selected_address;
11856 HOST_WIDE_INT max_count = max_address - fix->address;
11857 rtx label = gen_label_rtx ();
11859 selected_cost = arm_barrier_cost (from);
11860 selected_address = fix->address;
11862 while (from && count < max_count)
11867 /* This code shouldn't have been called if there was a natural barrier
11869 gcc_assert (GET_CODE (from) != BARRIER);
11871 /* Count the length of this insn. */
11872 count += get_attr_length (from);
11874 /* If there is a jump table, add its length. */
11875 tmp = is_jump_table (from);
11878 count += get_jump_table_size (tmp);
11880 /* Jump tables aren't in a basic block, so base the cost on
11881 the dispatch insn. If we select this location, we will
11882 still put the pool after the table. */
11883 new_cost = arm_barrier_cost (from);
11885 if (count < max_count
11886 && (!selected || new_cost <= selected_cost))
11889 selected_cost = new_cost;
11890 selected_address = fix->address + count;
11893 /* Continue after the dispatch table. */
11894 from = NEXT_INSN (tmp);
11898 new_cost = arm_barrier_cost (from);
11900 if (count < max_count
11901 && (!selected || new_cost <= selected_cost))
11904 selected_cost = new_cost;
11905 selected_address = fix->address + count;
11908 from = NEXT_INSN (from);
11911 /* Make sure that we found a place to insert the jump. */
11912 gcc_assert (selected);
11914 /* Create a new JUMP_INSN that branches around a barrier. */
11915 from = emit_jump_insn_after (gen_jump (label), selected);
11916 JUMP_LABEL (from) = label;
11917 barrier = emit_barrier_after (from);
11918 emit_label_after (label, barrier);
11920 /* Create a minipool barrier entry for the new barrier. */
11921 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11922 new_fix->insn = barrier;
11923 new_fix->address = selected_address;
11924 new_fix->next = fix->next;
11925 fix->next = new_fix;
11930 /* Record that there is a natural barrier in the insn stream at
11933 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11935 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11938 fix->address = address;
11941 if (minipool_fix_head != NULL)
11942 minipool_fix_tail->next = fix;
11944 minipool_fix_head = fix;
11946 minipool_fix_tail = fix;
11949 /* Record INSN, which will need fixing up to load a value from the
11950 minipool. ADDRESS is the offset of the insn since the start of the
11951 function; LOC is a pointer to the part of the insn which requires
11952 fixing; VALUE is the constant that must be loaded, which is of type
11955 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11956 enum machine_mode mode, rtx value)
11958 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11961 fix->address = address;
11964 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11965 fix->value = value;
11966 fix->forwards = get_attr_pool_range (insn);
11967 fix->backwards = get_attr_neg_pool_range (insn);
11968 fix->minipool = NULL;
11970 /* If an insn doesn't have a range defined for it, then it isn't
11971 expecting to be reworked by this code. Better to stop now than
11972 to generate duff assembly code. */
11973 gcc_assert (fix->forwards || fix->backwards);
11975 /* If an entry requires 8-byte alignment then assume all constant pools
11976 require 4 bytes of padding. Trying to do this later on a per-pool
11977 basis is awkward because existing pool entries have to be modified. */
11978 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11983 fprintf (dump_file,
11984 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11985 GET_MODE_NAME (mode),
11986 INSN_UID (insn), (unsigned long) address,
11987 -1 * (long)fix->backwards, (long)fix->forwards);
11988 arm_print_value (dump_file, fix->value);
11989 fprintf (dump_file, "\n");
11992 /* Add it to the chain of fixes. */
11995 if (minipool_fix_head != NULL)
11996 minipool_fix_tail->next = fix;
11998 minipool_fix_head = fix;
12000 minipool_fix_tail = fix;
12003 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12004 Returns the number of insns needed, or 99 if we don't know how to
12007 arm_const_double_inline_cost (rtx val)
12009 rtx lowpart, highpart;
12010 enum machine_mode mode;
12012 mode = GET_MODE (val);
12014 if (mode == VOIDmode)
12017 gcc_assert (GET_MODE_SIZE (mode) == 8);
12019 lowpart = gen_lowpart (SImode, val);
12020 highpart = gen_highpart_mode (SImode, mode, val);
12022 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12023 gcc_assert (GET_CODE (highpart) == CONST_INT);
12025 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12026 NULL_RTX, NULL_RTX, 0, 0)
12027 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12028 NULL_RTX, NULL_RTX, 0, 0));
12031 /* Return true if it is worthwhile to split a 64-bit constant into two
12032 32-bit operations. This is the case if optimizing for size, or
12033 if we have load delay slots, or if one 32-bit part can be done with
12034 a single data operation. */
12036 arm_const_double_by_parts (rtx val)
12038 enum machine_mode mode = GET_MODE (val);
12041 if (optimize_size || arm_ld_sched)
12044 if (mode == VOIDmode)
12047 part = gen_highpart_mode (SImode, mode, val);
12049 gcc_assert (GET_CODE (part) == CONST_INT);
12051 if (const_ok_for_arm (INTVAL (part))
12052 || const_ok_for_arm (~INTVAL (part)))
12055 part = gen_lowpart (SImode, val);
12057 gcc_assert (GET_CODE (part) == CONST_INT);
12059 if (const_ok_for_arm (INTVAL (part))
12060 || const_ok_for_arm (~INTVAL (part)))
12066 /* Return true if it is possible to inline both the high and low parts
12067 of a 64-bit constant into 32-bit data processing instructions. */
12069 arm_const_double_by_immediates (rtx val)
12071 enum machine_mode mode = GET_MODE (val);
12074 if (mode == VOIDmode)
12077 part = gen_highpart_mode (SImode, mode, val);
12079 gcc_assert (GET_CODE (part) == CONST_INT);
12081 if (!const_ok_for_arm (INTVAL (part)))
12084 part = gen_lowpart (SImode, val);
12086 gcc_assert (GET_CODE (part) == CONST_INT);
12088 if (!const_ok_for_arm (INTVAL (part)))
12094 /* Scan INSN and note any of its operands that need fixing.
12095 If DO_PUSHES is false we do not actually push any of the fixups
12096 needed. The function returns TRUE if any fixups were needed/pushed.
12097 This is used by arm_memory_load_p() which needs to know about loads
12098 of constants that will be converted into minipool loads. */
12100 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12102 bool result = false;
12105 extract_insn (insn);
12107 if (!constrain_operands (1))
12108 fatal_insn_not_found (insn);
12110 if (recog_data.n_alternatives == 0)
12113 /* Fill in recog_op_alt with information about the constraints of
12115 preprocess_constraints ();
12117 for (opno = 0; opno < recog_data.n_operands; opno++)
12119 /* Things we need to fix can only occur in inputs. */
12120 if (recog_data.operand_type[opno] != OP_IN)
12123 /* If this alternative is a memory reference, then any mention
12124 of constants in this alternative is really to fool reload
12125 into allowing us to accept one there. We need to fix them up
12126 now so that we output the right code. */
12127 if (recog_op_alt[opno][which_alternative].memory_ok)
12129 rtx op = recog_data.operand[opno];
12131 if (CONSTANT_P (op))
12134 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12135 recog_data.operand_mode[opno], op);
12138 else if (GET_CODE (op) == MEM
12139 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12140 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12144 rtx cop = avoid_constant_pool_reference (op);
12146 /* Casting the address of something to a mode narrower
12147 than a word can cause avoid_constant_pool_reference()
12148 to return the pool reference itself. That's no good to
12149 us here. Lets just hope that we can use the
12150 constant pool value directly. */
12152 cop = get_pool_constant (XEXP (op, 0));
12154 push_minipool_fix (insn, address,
12155 recog_data.operand_loc[opno],
12156 recog_data.operand_mode[opno], cop);
12167 /* Convert instructions to their cc-clobbering variant if possible, since
12168 that allows us to use smaller encodings. */
12171 thumb2_reorg (void)
12176 INIT_REG_SET (&live);
12178 /* We are freeing block_for_insn in the toplev to keep compatibility
12179 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12180 compute_bb_for_insn ();
12186 COPY_REG_SET (&live, DF_LR_OUT (bb));
12187 df_simulate_initialize_backwards (bb, &live);
12188 FOR_BB_INSNS_REVERSE (bb, insn)
12190 if (NONJUMP_INSN_P (insn)
12191 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12193 rtx pat = PATTERN (insn);
12194 if (GET_CODE (pat) == SET
12195 && low_register_operand (XEXP (pat, 0), SImode)
12196 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12197 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12198 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12200 rtx dst = XEXP (pat, 0);
12201 rtx src = XEXP (pat, 1);
12202 rtx op0 = XEXP (src, 0);
12203 if (rtx_equal_p (dst, op0)
12204 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12206 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12207 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12208 rtvec vec = gen_rtvec (2, pat, clobber);
12209 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12210 INSN_CODE (insn) = -1;
12214 if (NONDEBUG_INSN_P (insn))
12215 df_simulate_one_insn_backwards (bb, insn, &live);
12218 CLEAR_REG_SET (&live);
12221 /* Gcc puts the pool in the wrong place for ARM, since we can only
12222 load addresses a limited distance around the pc. We do some
12223 special munging to move the constant pool values to the correct
12224 point in the code. */
12229 HOST_WIDE_INT address = 0;
12235 minipool_fix_head = minipool_fix_tail = NULL;
12237 /* The first insn must always be a note, or the code below won't
12238 scan it properly. */
12239 insn = get_insns ();
12240 gcc_assert (GET_CODE (insn) == NOTE);
12243 /* Scan all the insns and record the operands that will need fixing. */
12244 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12246 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12247 && (arm_cirrus_insn_p (insn)
12248 || GET_CODE (insn) == JUMP_INSN
12249 || arm_memory_load_p (insn)))
12250 cirrus_reorg (insn);
12252 if (GET_CODE (insn) == BARRIER)
12253 push_minipool_barrier (insn, address);
12254 else if (INSN_P (insn))
12258 note_invalid_constants (insn, address, true);
12259 address += get_attr_length (insn);
12261 /* If the insn is a vector jump, add the size of the table
12262 and skip the table. */
12263 if ((table = is_jump_table (insn)) != NULL)
12265 address += get_jump_table_size (table);
12271 fix = minipool_fix_head;
12273 /* Now scan the fixups and perform the required changes. */
12278 Mfix * last_added_fix;
12279 Mfix * last_barrier = NULL;
12282 /* Skip any further barriers before the next fix. */
12283 while (fix && GET_CODE (fix->insn) == BARRIER)
12286 /* No more fixes. */
12290 last_added_fix = NULL;
12292 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12294 if (GET_CODE (ftmp->insn) == BARRIER)
12296 if (ftmp->address >= minipool_vector_head->max_address)
12299 last_barrier = ftmp;
12301 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12304 last_added_fix = ftmp; /* Keep track of the last fix added. */
12307 /* If we found a barrier, drop back to that; any fixes that we
12308 could have reached but come after the barrier will now go in
12309 the next mini-pool. */
12310 if (last_barrier != NULL)
12312 /* Reduce the refcount for those fixes that won't go into this
12314 for (fdel = last_barrier->next;
12315 fdel && fdel != ftmp;
12318 fdel->minipool->refcount--;
12319 fdel->minipool = NULL;
12322 ftmp = last_barrier;
12326 /* ftmp is first fix that we can't fit into this pool and
12327 there no natural barriers that we could use. Insert a
12328 new barrier in the code somewhere between the previous
12329 fix and this one, and arrange to jump around it. */
12330 HOST_WIDE_INT max_address;
12332 /* The last item on the list of fixes must be a barrier, so
12333 we can never run off the end of the list of fixes without
12334 last_barrier being set. */
12337 max_address = minipool_vector_head->max_address;
12338 /* Check that there isn't another fix that is in range that
12339 we couldn't fit into this pool because the pool was
12340 already too large: we need to put the pool before such an
12341 instruction. The pool itself may come just after the
12342 fix because create_fix_barrier also allows space for a
12343 jump instruction. */
12344 if (ftmp->address < max_address)
12345 max_address = ftmp->address + 1;
12347 last_barrier = create_fix_barrier (last_added_fix, max_address);
12350 assign_minipool_offsets (last_barrier);
12354 if (GET_CODE (ftmp->insn) != BARRIER
12355 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12362 /* Scan over the fixes we have identified for this pool, fixing them
12363 up and adding the constants to the pool itself. */
12364 for (this_fix = fix; this_fix && ftmp != this_fix;
12365 this_fix = this_fix->next)
12366 if (GET_CODE (this_fix->insn) != BARRIER)
12369 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12370 minipool_vector_label),
12371 this_fix->minipool->offset);
12372 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12375 dump_minipool (last_barrier->insn);
12379 /* From now on we must synthesize any constants that we can't handle
12380 directly. This can happen if the RTL gets split during final
12381 instruction generation. */
12382 after_arm_reorg = 1;
12384 /* Free the minipool memory. */
12385 obstack_free (&minipool_obstack, minipool_startobj);
12388 /* Routines to output assembly language. */
12390 /* If the rtx is the correct value then return the string of the number.
12391 In this way we can ensure that valid double constants are generated even
12392 when cross compiling. */
12394 fp_immediate_constant (rtx x)
12399 if (!fp_consts_inited)
12402 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12403 for (i = 0; i < 8; i++)
12404 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12405 return strings_fp[i];
12407 gcc_unreachable ();
12410 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12411 static const char *
12412 fp_const_from_val (REAL_VALUE_TYPE *r)
12416 if (!fp_consts_inited)
12419 for (i = 0; i < 8; i++)
12420 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12421 return strings_fp[i];
12423 gcc_unreachable ();
12426 /* Output the operands of a LDM/STM instruction to STREAM.
12427 MASK is the ARM register set mask of which only bits 0-15 are important.
12428 REG is the base register, either the frame pointer or the stack pointer,
12429 INSTR is the possibly suffixed load or store instruction.
12430 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12433 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12434 unsigned long mask, int rfe)
12437 bool not_first = FALSE;
12439 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12440 fputc ('\t', stream);
12441 asm_fprintf (stream, instr, reg);
12442 fputc ('{', stream);
12444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12445 if (mask & (1 << i))
12448 fprintf (stream, ", ");
12450 asm_fprintf (stream, "%r", i);
12455 fprintf (stream, "}^\n");
12457 fprintf (stream, "}\n");
12461 /* Output a FLDMD instruction to STREAM.
12462 BASE if the register containing the address.
12463 REG and COUNT specify the register range.
12464 Extra registers may be added to avoid hardware bugs.
12466 We output FLDMD even for ARMv5 VFP implementations. Although
12467 FLDMD is technically not supported until ARMv6, it is believed
12468 that all VFP implementations support its use in this context. */
12471 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12475 /* Workaround ARM10 VFPr1 bug. */
12476 if (count == 2 && !arm_arch6)
12483 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12484 load into multiple parts if we have to handle more than 16 registers. */
12487 vfp_output_fldmd (stream, base, reg, 16);
12488 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12492 fputc ('\t', stream);
12493 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12495 for (i = reg; i < reg + count; i++)
12498 fputs (", ", stream);
12499 asm_fprintf (stream, "d%d", i);
12501 fputs ("}\n", stream);
12506 /* Output the assembly for a store multiple. */
12509 vfp_output_fstmd (rtx * operands)
12516 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12517 p = strlen (pattern);
12519 gcc_assert (GET_CODE (operands[1]) == REG);
12521 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12522 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12524 p += sprintf (&pattern[p], ", d%d", base + i);
12526 strcpy (&pattern[p], "}");
12528 output_asm_insn (pattern, operands);
12533 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12534 number of bytes pushed. */
12537 vfp_emit_fstmd (int base_reg, int count)
12544 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12545 register pairs are stored by a store multiple insn. We avoid this
12546 by pushing an extra pair. */
12547 if (count == 2 && !arm_arch6)
12549 if (base_reg == LAST_VFP_REGNUM - 3)
12554 /* FSTMD may not store more than 16 doubleword registers at once. Split
12555 larger stores into multiple parts (up to a maximum of two, in
12560 /* NOTE: base_reg is an internal register number, so each D register
12562 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12563 saved += vfp_emit_fstmd (base_reg, 16);
12567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12568 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12570 reg = gen_rtx_REG (DFmode, base_reg);
12573 XVECEXP (par, 0, 0)
12574 = gen_rtx_SET (VOIDmode,
12577 gen_rtx_PRE_MODIFY (Pmode,
12580 (stack_pointer_rtx,
12583 gen_rtx_UNSPEC (BLKmode,
12584 gen_rtvec (1, reg),
12585 UNSPEC_PUSH_MULT));
12587 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12588 plus_constant (stack_pointer_rtx, -(count * 8)));
12589 RTX_FRAME_RELATED_P (tmp) = 1;
12590 XVECEXP (dwarf, 0, 0) = tmp;
12592 tmp = gen_rtx_SET (VOIDmode,
12593 gen_frame_mem (DFmode, stack_pointer_rtx),
12595 RTX_FRAME_RELATED_P (tmp) = 1;
12596 XVECEXP (dwarf, 0, 1) = tmp;
12598 for (i = 1; i < count; i++)
12600 reg = gen_rtx_REG (DFmode, base_reg);
12602 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12604 tmp = gen_rtx_SET (VOIDmode,
12605 gen_frame_mem (DFmode,
12606 plus_constant (stack_pointer_rtx,
12609 RTX_FRAME_RELATED_P (tmp) = 1;
12610 XVECEXP (dwarf, 0, i + 1) = tmp;
12613 par = emit_insn (par);
12614 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12615 RTX_FRAME_RELATED_P (par) = 1;
12620 /* Emit a call instruction with pattern PAT. ADDR is the address of
12621 the call target. */
12624 arm_emit_call_insn (rtx pat, rtx addr)
12628 insn = emit_call_insn (pat);
12630 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12631 If the call might use such an entry, add a use of the PIC register
12632 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12633 if (TARGET_VXWORKS_RTP
12635 && GET_CODE (addr) == SYMBOL_REF
12636 && (SYMBOL_REF_DECL (addr)
12637 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12638 : !SYMBOL_REF_LOCAL_P (addr)))
12640 require_pic_register ();
12641 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12645 /* Output a 'call' insn. */
12647 output_call (rtx *operands)
12649 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12651 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12652 if (REGNO (operands[0]) == LR_REGNUM)
12654 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12655 output_asm_insn ("mov%?\t%0, %|lr", operands);
12658 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12660 if (TARGET_INTERWORK || arm_arch4t)
12661 output_asm_insn ("bx%?\t%0", operands);
12663 output_asm_insn ("mov%?\t%|pc, %0", operands);
12668 /* Output a 'call' insn that is a reference in memory. This is
12669 disabled for ARMv5 and we prefer a blx instead because otherwise
12670 there's a significant performance overhead. */
12672 output_call_mem (rtx *operands)
12674 gcc_assert (!arm_arch5);
12675 if (TARGET_INTERWORK)
12677 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12678 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12679 output_asm_insn ("bx%?\t%|ip", operands);
12681 else if (regno_use_in (LR_REGNUM, operands[0]))
12683 /* LR is used in the memory address. We load the address in the
12684 first instruction. It's safe to use IP as the target of the
12685 load since the call will kill it anyway. */
12686 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12687 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12689 output_asm_insn ("bx%?\t%|ip", operands);
12691 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12695 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12696 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12703 /* Output a move from arm registers to an fpa registers.
12704 OPERANDS[0] is an fpa register.
12705 OPERANDS[1] is the first registers of an arm register pair. */
12707 output_mov_long_double_fpa_from_arm (rtx *operands)
12709 int arm_reg0 = REGNO (operands[1]);
12712 gcc_assert (arm_reg0 != IP_REGNUM);
12714 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12715 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12716 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12718 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12719 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12724 /* Output a move from an fpa register to arm registers.
12725 OPERANDS[0] is the first registers of an arm register pair.
12726 OPERANDS[1] is an fpa register. */
12728 output_mov_long_double_arm_from_fpa (rtx *operands)
12730 int arm_reg0 = REGNO (operands[0]);
12733 gcc_assert (arm_reg0 != IP_REGNUM);
12735 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12736 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12737 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12739 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12740 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12744 /* Output a move from arm registers to arm registers of a long double
12745 OPERANDS[0] is the destination.
12746 OPERANDS[1] is the source. */
12748 output_mov_long_double_arm_from_arm (rtx *operands)
12750 /* We have to be careful here because the two might overlap. */
12751 int dest_start = REGNO (operands[0]);
12752 int src_start = REGNO (operands[1]);
12756 if (dest_start < src_start)
12758 for (i = 0; i < 3; i++)
12760 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12761 ops[1] = gen_rtx_REG (SImode, src_start + i);
12762 output_asm_insn ("mov%?\t%0, %1", ops);
12767 for (i = 2; i >= 0; i--)
12769 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12770 ops[1] = gen_rtx_REG (SImode, src_start + i);
12771 output_asm_insn ("mov%?\t%0, %1", ops);
12779 arm_emit_movpair (rtx dest, rtx src)
12781 /* If the src is an immediate, simplify it. */
12782 if (CONST_INT_P (src))
12784 HOST_WIDE_INT val = INTVAL (src);
12785 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12786 if ((val >> 16) & 0x0000ffff)
12787 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12789 GEN_INT ((val >> 16) & 0x0000ffff));
12792 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12793 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12796 /* Output a move from arm registers to an fpa registers.
12797 OPERANDS[0] is an fpa register.
12798 OPERANDS[1] is the first registers of an arm register pair. */
12800 output_mov_double_fpa_from_arm (rtx *operands)
12802 int arm_reg0 = REGNO (operands[1]);
12805 gcc_assert (arm_reg0 != IP_REGNUM);
12807 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12808 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12809 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12810 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12814 /* Output a move from an fpa register to arm registers.
12815 OPERANDS[0] is the first registers of an arm register pair.
12816 OPERANDS[1] is an fpa register. */
12818 output_mov_double_arm_from_fpa (rtx *operands)
12820 int arm_reg0 = REGNO (operands[0]);
12823 gcc_assert (arm_reg0 != IP_REGNUM);
12825 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12826 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12827 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12828 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12832 /* Output a move between double words.
12833 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12834 or MEM<-REG and all MEMs must be offsettable addresses. */
12836 output_move_double (rtx *operands)
12838 enum rtx_code code0 = GET_CODE (operands[0]);
12839 enum rtx_code code1 = GET_CODE (operands[1]);
12844 unsigned int reg0 = REGNO (operands[0]);
12846 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12848 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12850 switch (GET_CODE (XEXP (operands[1], 0)))
12854 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12855 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12857 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12861 gcc_assert (TARGET_LDRD);
12862 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12867 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12869 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12874 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12876 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12880 gcc_assert (TARGET_LDRD);
12881 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12886 /* Autoicrement addressing modes should never have overlapping
12887 base and destination registers, and overlapping index registers
12888 are already prohibited, so this doesn't need to worry about
12890 otherops[0] = operands[0];
12891 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12892 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12894 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12896 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12898 /* Registers overlap so split out the increment. */
12899 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12900 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12904 /* Use a single insn if we can.
12905 FIXME: IWMMXT allows offsets larger than ldrd can
12906 handle, fix these up with a pair of ldr. */
12908 || GET_CODE (otherops[2]) != CONST_INT
12909 || (INTVAL (otherops[2]) > -256
12910 && INTVAL (otherops[2]) < 256))
12911 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12914 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12915 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12921 /* Use a single insn if we can.
12922 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12923 fix these up with a pair of ldr. */
12925 || GET_CODE (otherops[2]) != CONST_INT
12926 || (INTVAL (otherops[2]) > -256
12927 && INTVAL (otherops[2]) < 256))
12928 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12931 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12932 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12939 /* We might be able to use ldrd %0, %1 here. However the range is
12940 different to ldr/adr, and it is broken on some ARMv7-M
12941 implementations. */
12942 /* Use the second register of the pair to avoid problematic
12944 otherops[1] = operands[1];
12945 output_asm_insn ("adr%?\t%0, %1", otherops);
12946 operands[1] = otherops[0];
12948 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12950 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12953 /* ??? This needs checking for thumb2. */
12955 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12956 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12958 otherops[0] = operands[0];
12959 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12960 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12962 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12964 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12966 switch ((int) INTVAL (otherops[2]))
12969 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12974 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12979 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12983 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12984 operands[1] = otherops[0];
12986 && (GET_CODE (otherops[2]) == REG
12988 || (GET_CODE (otherops[2]) == CONST_INT
12989 && INTVAL (otherops[2]) > -256
12990 && INTVAL (otherops[2]) < 256)))
12992 if (reg_overlap_mentioned_p (operands[0],
12996 /* Swap base and index registers over to
12997 avoid a conflict. */
12999 otherops[1] = otherops[2];
13002 /* If both registers conflict, it will usually
13003 have been fixed by a splitter. */
13004 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13005 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13007 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13008 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13012 otherops[0] = operands[0];
13013 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13018 if (GET_CODE (otherops[2]) == CONST_INT)
13020 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13021 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13023 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13026 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13029 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13032 return "ldr%(d%)\t%0, [%1]";
13034 return "ldm%(ia%)\t%1, %M0";
13038 otherops[1] = adjust_address (operands[1], SImode, 4);
13039 /* Take care of overlapping base/data reg. */
13040 if (reg_mentioned_p (operands[0], operands[1]))
13042 output_asm_insn ("ldr%?\t%0, %1", otherops);
13043 output_asm_insn ("ldr%?\t%0, %1", operands);
13047 output_asm_insn ("ldr%?\t%0, %1", operands);
13048 output_asm_insn ("ldr%?\t%0, %1", otherops);
13055 /* Constraints should ensure this. */
13056 gcc_assert (code0 == MEM && code1 == REG);
13057 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13059 switch (GET_CODE (XEXP (operands[0], 0)))
13063 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13065 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13069 gcc_assert (TARGET_LDRD);
13070 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13075 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13077 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13082 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13084 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13088 gcc_assert (TARGET_LDRD);
13089 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13094 otherops[0] = operands[1];
13095 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13096 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13098 /* IWMMXT allows offsets larger than ldrd can handle,
13099 fix these up with a pair of ldr. */
13101 && GET_CODE (otherops[2]) == CONST_INT
13102 && (INTVAL(otherops[2]) <= -256
13103 || INTVAL(otherops[2]) >= 256))
13105 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13107 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13108 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13112 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13113 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13116 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13117 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13119 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13123 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13124 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13126 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13129 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13135 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13141 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13146 && (GET_CODE (otherops[2]) == REG
13148 || (GET_CODE (otherops[2]) == CONST_INT
13149 && INTVAL (otherops[2]) > -256
13150 && INTVAL (otherops[2]) < 256)))
13152 otherops[0] = operands[1];
13153 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13154 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13160 otherops[0] = adjust_address (operands[0], SImode, 4);
13161 otherops[1] = operands[1];
13162 output_asm_insn ("str%?\t%1, %0", operands);
13163 output_asm_insn ("str%?\t%H1, %0", otherops);
13170 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13171 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13174 output_move_quad (rtx *operands)
13176 if (REG_P (operands[0]))
13178 /* Load, or reg->reg move. */
13180 if (MEM_P (operands[1]))
13182 switch (GET_CODE (XEXP (operands[1], 0)))
13185 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13190 output_asm_insn ("adr%?\t%0, %1", operands);
13191 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13195 gcc_unreachable ();
13203 gcc_assert (REG_P (operands[1]));
13205 dest = REGNO (operands[0]);
13206 src = REGNO (operands[1]);
13208 /* This seems pretty dumb, but hopefully GCC won't try to do it
13211 for (i = 0; i < 4; i++)
13213 ops[0] = gen_rtx_REG (SImode, dest + i);
13214 ops[1] = gen_rtx_REG (SImode, src + i);
13215 output_asm_insn ("mov%?\t%0, %1", ops);
13218 for (i = 3; i >= 0; i--)
13220 ops[0] = gen_rtx_REG (SImode, dest + i);
13221 ops[1] = gen_rtx_REG (SImode, src + i);
13222 output_asm_insn ("mov%?\t%0, %1", ops);
13228 gcc_assert (MEM_P (operands[0]));
13229 gcc_assert (REG_P (operands[1]));
13230 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13232 switch (GET_CODE (XEXP (operands[0], 0)))
13235 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13239 gcc_unreachable ();
13246 /* Output a VFP load or store instruction. */
13249 output_move_vfp (rtx *operands)
13251 rtx reg, mem, addr, ops[2];
13252 int load = REG_P (operands[0]);
13253 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13254 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13257 enum machine_mode mode;
13259 reg = operands[!load];
13260 mem = operands[load];
13262 mode = GET_MODE (reg);
13264 gcc_assert (REG_P (reg));
13265 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13266 gcc_assert (mode == SFmode
13270 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13271 gcc_assert (MEM_P (mem));
13273 addr = XEXP (mem, 0);
13275 switch (GET_CODE (addr))
13278 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13279 ops[0] = XEXP (addr, 0);
13284 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13285 ops[0] = XEXP (addr, 0);
13290 templ = "f%s%c%%?\t%%%s0, %%1%s";
13296 sprintf (buff, templ,
13297 load ? "ld" : "st",
13300 integer_p ? "\t%@ int" : "");
13301 output_asm_insn (buff, ops);
13306 /* Output a Neon quad-word load or store, or a load or store for
13307 larger structure modes.
13309 WARNING: The ordering of elements is weird in big-endian mode,
13310 because we use VSTM, as required by the EABI. GCC RTL defines
13311 element ordering based on in-memory order. This can be differ
13312 from the architectural ordering of elements within a NEON register.
13313 The intrinsics defined in arm_neon.h use the NEON register element
13314 ordering, not the GCC RTL element ordering.
13316 For example, the in-memory ordering of a big-endian a quadword
13317 vector with 16-bit elements when stored from register pair {d0,d1}
13318 will be (lowest address first, d0[N] is NEON register element N):
13320 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13322 When necessary, quadword registers (dN, dN+1) are moved to ARM
13323 registers from rN in the order:
13325 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13327 So that STM/LDM can be used on vectors in ARM registers, and the
13328 same memory layout will result as if VSTM/VLDM were used. */
13331 output_move_neon (rtx *operands)
13333 rtx reg, mem, addr, ops[2];
13334 int regno, load = REG_P (operands[0]);
13337 enum machine_mode mode;
13339 reg = operands[!load];
13340 mem = operands[load];
13342 mode = GET_MODE (reg);
13344 gcc_assert (REG_P (reg));
13345 regno = REGNO (reg);
13346 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13347 || NEON_REGNO_OK_FOR_QUAD (regno));
13348 gcc_assert (VALID_NEON_DREG_MODE (mode)
13349 || VALID_NEON_QREG_MODE (mode)
13350 || VALID_NEON_STRUCT_MODE (mode));
13351 gcc_assert (MEM_P (mem));
13353 addr = XEXP (mem, 0);
13355 /* Strip off const from addresses like (const (plus (...))). */
13356 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13357 addr = XEXP (addr, 0);
13359 switch (GET_CODE (addr))
13362 templ = "v%smia%%?\t%%0!, %%h1";
13363 ops[0] = XEXP (addr, 0);
13368 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13369 templ = "v%smdb%%?\t%%0!, %%h1";
13370 ops[0] = XEXP (addr, 0);
13375 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13376 gcc_unreachable ();
13381 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13384 for (i = 0; i < nregs; i++)
13386 /* We're only using DImode here because it's a convenient size. */
13387 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13388 ops[1] = adjust_address (mem, DImode, 8 * i);
13389 if (reg_overlap_mentioned_p (ops[0], mem))
13391 gcc_assert (overlap == -1);
13396 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13397 output_asm_insn (buff, ops);
13402 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13403 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13404 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13405 output_asm_insn (buff, ops);
13412 templ = "v%smia%%?\t%%m0, %%h1";
13417 sprintf (buff, templ, load ? "ld" : "st");
13418 output_asm_insn (buff, ops);
13423 /* Compute and return the length of neon_mov<mode>, where <mode> is
13424 one of VSTRUCT modes: EI, OI, CI or XI. */
13426 arm_attr_length_move_neon (rtx insn)
13428 rtx reg, mem, addr;
13430 enum machine_mode mode;
13432 extract_insn_cached (insn);
13434 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13436 mode = GET_MODE (recog_data.operand[0]);
13447 gcc_unreachable ();
13451 load = REG_P (recog_data.operand[0]);
13452 reg = recog_data.operand[!load];
13453 mem = recog_data.operand[load];
13455 gcc_assert (MEM_P (mem));
13457 mode = GET_MODE (reg);
13458 addr = XEXP (mem, 0);
13460 /* Strip off const from addresses like (const (plus (...))). */
13461 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13462 addr = XEXP (addr, 0);
13464 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13466 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13473 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13477 arm_address_offset_is_imm (rtx insn)
13481 extract_insn_cached (insn);
13483 if (REG_P (recog_data.operand[0]))
13486 mem = recog_data.operand[0];
13488 gcc_assert (MEM_P (mem));
13490 addr = XEXP (mem, 0);
13492 if (GET_CODE (addr) == REG
13493 || (GET_CODE (addr) == PLUS
13494 && GET_CODE (XEXP (addr, 0)) == REG
13495 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13501 /* Output an ADD r, s, #n where n may be too big for one instruction.
13502 If adding zero to one register, output nothing. */
13504 output_add_immediate (rtx *operands)
13506 HOST_WIDE_INT n = INTVAL (operands[2]);
13508 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13511 output_multi_immediate (operands,
13512 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13515 output_multi_immediate (operands,
13516 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13523 /* Output a multiple immediate operation.
13524 OPERANDS is the vector of operands referred to in the output patterns.
13525 INSTR1 is the output pattern to use for the first constant.
13526 INSTR2 is the output pattern to use for subsequent constants.
13527 IMMED_OP is the index of the constant slot in OPERANDS.
13528 N is the constant value. */
13529 static const char *
13530 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13531 int immed_op, HOST_WIDE_INT n)
13533 #if HOST_BITS_PER_WIDE_INT > 32
13539 /* Quick and easy output. */
13540 operands[immed_op] = const0_rtx;
13541 output_asm_insn (instr1, operands);
13546 const char * instr = instr1;
13548 /* Note that n is never zero here (which would give no output). */
13549 for (i = 0; i < 32; i += 2)
13553 operands[immed_op] = GEN_INT (n & (255 << i));
13554 output_asm_insn (instr, operands);
13564 /* Return the name of a shifter operation. */
13565 static const char *
13566 arm_shift_nmem(enum rtx_code code)
13571 return ARM_LSL_NAME;
13587 /* Return the appropriate ARM instruction for the operation code.
13588 The returned result should not be overwritten. OP is the rtx of the
13589 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13592 arithmetic_instr (rtx op, int shift_first_arg)
13594 switch (GET_CODE (op))
13600 return shift_first_arg ? "rsb" : "sub";
13615 return arm_shift_nmem(GET_CODE(op));
13618 gcc_unreachable ();
13622 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13623 for the operation code. The returned result should not be overwritten.
13624 OP is the rtx code of the shift.
13625 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13627 static const char *
13628 shift_op (rtx op, HOST_WIDE_INT *amountp)
13631 enum rtx_code code = GET_CODE (op);
13633 switch (GET_CODE (XEXP (op, 1)))
13641 *amountp = INTVAL (XEXP (op, 1));
13645 gcc_unreachable ();
13651 gcc_assert (*amountp != -1);
13652 *amountp = 32 - *amountp;
13655 /* Fall through. */
13661 mnem = arm_shift_nmem(code);
13665 /* We never have to worry about the amount being other than a
13666 power of 2, since this case can never be reloaded from a reg. */
13667 gcc_assert (*amountp != -1);
13668 *amountp = int_log2 (*amountp);
13669 return ARM_LSL_NAME;
13672 gcc_unreachable ();
13675 if (*amountp != -1)
13677 /* This is not 100% correct, but follows from the desire to merge
13678 multiplication by a power of 2 with the recognizer for a
13679 shift. >=32 is not a valid shift for "lsl", so we must try and
13680 output a shift that produces the correct arithmetical result.
13681 Using lsr #32 is identical except for the fact that the carry bit
13682 is not set correctly if we set the flags; but we never use the
13683 carry bit from such an operation, so we can ignore that. */
13684 if (code == ROTATERT)
13685 /* Rotate is just modulo 32. */
13687 else if (*amountp != (*amountp & 31))
13689 if (code == ASHIFT)
13694 /* Shifts of 0 are no-ops. */
13702 /* Obtain the shift from the POWER of two. */
13704 static HOST_WIDE_INT
13705 int_log2 (HOST_WIDE_INT power)
13707 HOST_WIDE_INT shift = 0;
13709 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13711 gcc_assert (shift <= 31);
13718 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13719 because /bin/as is horribly restrictive. The judgement about
13720 whether or not each character is 'printable' (and can be output as
13721 is) or not (and must be printed with an octal escape) must be made
13722 with reference to the *host* character set -- the situation is
13723 similar to that discussed in the comments above pp_c_char in
13724 c-pretty-print.c. */
13726 #define MAX_ASCII_LEN 51
13729 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13732 int len_so_far = 0;
13734 fputs ("\t.ascii\t\"", stream);
13736 for (i = 0; i < len; i++)
13740 if (len_so_far >= MAX_ASCII_LEN)
13742 fputs ("\"\n\t.ascii\t\"", stream);
13748 if (c == '\\' || c == '\"')
13750 putc ('\\', stream);
13758 fprintf (stream, "\\%03o", c);
13763 fputs ("\"\n", stream);
13766 /* Compute the register save mask for registers 0 through 12
13767 inclusive. This code is used by arm_compute_save_reg_mask. */
13769 static unsigned long
13770 arm_compute_save_reg0_reg12_mask (void)
13772 unsigned long func_type = arm_current_func_type ();
13773 unsigned long save_reg_mask = 0;
13776 if (IS_INTERRUPT (func_type))
13778 unsigned int max_reg;
13779 /* Interrupt functions must not corrupt any registers,
13780 even call clobbered ones. If this is a leaf function
13781 we can just examine the registers used by the RTL, but
13782 otherwise we have to assume that whatever function is
13783 called might clobber anything, and so we have to save
13784 all the call-clobbered registers as well. */
13785 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13786 /* FIQ handlers have registers r8 - r12 banked, so
13787 we only need to check r0 - r7, Normal ISRs only
13788 bank r14 and r15, so we must check up to r12.
13789 r13 is the stack pointer which is always preserved,
13790 so we do not need to consider it here. */
13795 for (reg = 0; reg <= max_reg; reg++)
13796 if (df_regs_ever_live_p (reg)
13797 || (! current_function_is_leaf && call_used_regs[reg]))
13798 save_reg_mask |= (1 << reg);
13800 /* Also save the pic base register if necessary. */
13802 && !TARGET_SINGLE_PIC_BASE
13803 && arm_pic_register != INVALID_REGNUM
13804 && crtl->uses_pic_offset_table)
13805 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13807 else if (IS_VOLATILE(func_type))
13809 /* For noreturn functions we historically omitted register saves
13810 altogether. However this really messes up debugging. As a
13811 compromise save just the frame pointers. Combined with the link
13812 register saved elsewhere this should be sufficient to get
13814 if (frame_pointer_needed)
13815 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13816 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13817 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13818 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13819 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13823 /* In the normal case we only need to save those registers
13824 which are call saved and which are used by this function. */
13825 for (reg = 0; reg <= 11; reg++)
13826 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13827 save_reg_mask |= (1 << reg);
13829 /* Handle the frame pointer as a special case. */
13830 if (frame_pointer_needed)
13831 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13833 /* If we aren't loading the PIC register,
13834 don't stack it even though it may be live. */
13836 && !TARGET_SINGLE_PIC_BASE
13837 && arm_pic_register != INVALID_REGNUM
13838 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13839 || crtl->uses_pic_offset_table))
13840 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13842 /* The prologue will copy SP into R0, so save it. */
13843 if (IS_STACKALIGN (func_type))
13844 save_reg_mask |= 1;
13847 /* Save registers so the exception handler can modify them. */
13848 if (crtl->calls_eh_return)
13854 reg = EH_RETURN_DATA_REGNO (i);
13855 if (reg == INVALID_REGNUM)
13857 save_reg_mask |= 1 << reg;
13861 return save_reg_mask;
13865 /* Compute the number of bytes used to store the static chain register on the
13866 stack, above the stack frame. We need to know this accurately to get the
13867 alignment of the rest of the stack frame correct. */
13869 static int arm_compute_static_chain_stack_bytes (void)
13871 unsigned long func_type = arm_current_func_type ();
13872 int static_chain_stack_bytes = 0;
13874 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13875 IS_NESTED (func_type) &&
13876 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13877 static_chain_stack_bytes = 4;
13879 return static_chain_stack_bytes;
13883 /* Compute a bit mask of which registers need to be
13884 saved on the stack for the current function.
13885 This is used by arm_get_frame_offsets, which may add extra registers. */
13887 static unsigned long
13888 arm_compute_save_reg_mask (void)
13890 unsigned int save_reg_mask = 0;
13891 unsigned long func_type = arm_current_func_type ();
13894 if (IS_NAKED (func_type))
13895 /* This should never really happen. */
13898 /* If we are creating a stack frame, then we must save the frame pointer,
13899 IP (which will hold the old stack pointer), LR and the PC. */
13900 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13902 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13905 | (1 << PC_REGNUM);
13907 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13909 /* Decide if we need to save the link register.
13910 Interrupt routines have their own banked link register,
13911 so they never need to save it.
13912 Otherwise if we do not use the link register we do not need to save
13913 it. If we are pushing other registers onto the stack however, we
13914 can save an instruction in the epilogue by pushing the link register
13915 now and then popping it back into the PC. This incurs extra memory
13916 accesses though, so we only do it when optimizing for size, and only
13917 if we know that we will not need a fancy return sequence. */
13918 if (df_regs_ever_live_p (LR_REGNUM)
13921 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13922 && !crtl->calls_eh_return))
13923 save_reg_mask |= 1 << LR_REGNUM;
13925 if (cfun->machine->lr_save_eliminated)
13926 save_reg_mask &= ~ (1 << LR_REGNUM);
13928 if (TARGET_REALLY_IWMMXT
13929 && ((bit_count (save_reg_mask)
13930 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13931 arm_compute_static_chain_stack_bytes())
13934 /* The total number of registers that are going to be pushed
13935 onto the stack is odd. We need to ensure that the stack
13936 is 64-bit aligned before we start to save iWMMXt registers,
13937 and also before we start to create locals. (A local variable
13938 might be a double or long long which we will load/store using
13939 an iWMMXt instruction). Therefore we need to push another
13940 ARM register, so that the stack will be 64-bit aligned. We
13941 try to avoid using the arg registers (r0 -r3) as they might be
13942 used to pass values in a tail call. */
13943 for (reg = 4; reg <= 12; reg++)
13944 if ((save_reg_mask & (1 << reg)) == 0)
13948 save_reg_mask |= (1 << reg);
13951 cfun->machine->sibcall_blocked = 1;
13952 save_reg_mask |= (1 << 3);
13956 /* We may need to push an additional register for use initializing the
13957 PIC base register. */
13958 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13959 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13961 reg = thumb_find_work_register (1 << 4);
13962 if (!call_used_regs[reg])
13963 save_reg_mask |= (1 << reg);
13966 return save_reg_mask;
13970 /* Compute a bit mask of which registers need to be
13971 saved on the stack for the current function. */
13972 static unsigned long
13973 thumb1_compute_save_reg_mask (void)
13975 unsigned long mask;
13979 for (reg = 0; reg < 12; reg ++)
13980 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13984 && !TARGET_SINGLE_PIC_BASE
13985 && arm_pic_register != INVALID_REGNUM
13986 && crtl->uses_pic_offset_table)
13987 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13989 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13990 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13991 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13993 /* LR will also be pushed if any lo regs are pushed. */
13994 if (mask & 0xff || thumb_force_lr_save ())
13995 mask |= (1 << LR_REGNUM);
13997 /* Make sure we have a low work register if we need one.
13998 We will need one if we are going to push a high register,
13999 but we are not currently intending to push a low register. */
14000 if ((mask & 0xff) == 0
14001 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14003 /* Use thumb_find_work_register to choose which register
14004 we will use. If the register is live then we will
14005 have to push it. Use LAST_LO_REGNUM as our fallback
14006 choice for the register to select. */
14007 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14008 /* Make sure the register returned by thumb_find_work_register is
14009 not part of the return value. */
14010 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14011 reg = LAST_LO_REGNUM;
14013 if (! call_used_regs[reg])
14017 /* The 504 below is 8 bytes less than 512 because there are two possible
14018 alignment words. We can't tell here if they will be present or not so we
14019 have to play it safe and assume that they are. */
14020 if ((CALLER_INTERWORKING_SLOT_SIZE +
14021 ROUND_UP_WORD (get_frame_size ()) +
14022 crtl->outgoing_args_size) >= 504)
14024 /* This is the same as the code in thumb1_expand_prologue() which
14025 determines which register to use for stack decrement. */
14026 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14027 if (mask & (1 << reg))
14030 if (reg > LAST_LO_REGNUM)
14032 /* Make sure we have a register available for stack decrement. */
14033 mask |= 1 << LAST_LO_REGNUM;
14041 /* Return the number of bytes required to save VFP registers. */
14043 arm_get_vfp_saved_size (void)
14045 unsigned int regno;
14050 /* Space for saved VFP registers. */
14051 if (TARGET_HARD_FLOAT && TARGET_VFP)
14054 for (regno = FIRST_VFP_REGNUM;
14055 regno < LAST_VFP_REGNUM;
14058 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14059 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14063 /* Workaround ARM10 VFPr1 bug. */
14064 if (count == 2 && !arm_arch6)
14066 saved += count * 8;
14075 if (count == 2 && !arm_arch6)
14077 saved += count * 8;
14084 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14085 everything bar the final return instruction. */
14087 output_return_instruction (rtx operand, int really_return, int reverse)
14089 char conditional[10];
14092 unsigned long live_regs_mask;
14093 unsigned long func_type;
14094 arm_stack_offsets *offsets;
14096 func_type = arm_current_func_type ();
14098 if (IS_NAKED (func_type))
14101 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14103 /* If this function was declared non-returning, and we have
14104 found a tail call, then we have to trust that the called
14105 function won't return. */
14110 /* Otherwise, trap an attempted return by aborting. */
14112 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14114 assemble_external_libcall (ops[1]);
14115 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14121 gcc_assert (!cfun->calls_alloca || really_return);
14123 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14125 cfun->machine->return_used_this_function = 1;
14127 offsets = arm_get_frame_offsets ();
14128 live_regs_mask = offsets->saved_regs_mask;
14130 if (live_regs_mask)
14132 const char * return_reg;
14134 /* If we do not have any special requirements for function exit
14135 (e.g. interworking) then we can load the return address
14136 directly into the PC. Otherwise we must load it into LR. */
14138 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14139 return_reg = reg_names[PC_REGNUM];
14141 return_reg = reg_names[LR_REGNUM];
14143 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14145 /* There are three possible reasons for the IP register
14146 being saved. 1) a stack frame was created, in which case
14147 IP contains the old stack pointer, or 2) an ISR routine
14148 corrupted it, or 3) it was saved to align the stack on
14149 iWMMXt. In case 1, restore IP into SP, otherwise just
14151 if (frame_pointer_needed)
14153 live_regs_mask &= ~ (1 << IP_REGNUM);
14154 live_regs_mask |= (1 << SP_REGNUM);
14157 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14160 /* On some ARM architectures it is faster to use LDR rather than
14161 LDM to load a single register. On other architectures, the
14162 cost is the same. In 26 bit mode, or for exception handlers,
14163 we have to use LDM to load the PC so that the CPSR is also
14165 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14166 if (live_regs_mask == (1U << reg))
14169 if (reg <= LAST_ARM_REGNUM
14170 && (reg != LR_REGNUM
14172 || ! IS_INTERRUPT (func_type)))
14174 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14175 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14182 /* Generate the load multiple instruction to restore the
14183 registers. Note we can get here, even if
14184 frame_pointer_needed is true, but only if sp already
14185 points to the base of the saved core registers. */
14186 if (live_regs_mask & (1 << SP_REGNUM))
14188 unsigned HOST_WIDE_INT stack_adjust;
14190 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14191 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14193 if (stack_adjust && arm_arch5 && TARGET_ARM)
14194 if (TARGET_UNIFIED_ASM)
14195 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14197 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14200 /* If we can't use ldmib (SA110 bug),
14201 then try to pop r3 instead. */
14203 live_regs_mask |= 1 << 3;
14205 if (TARGET_UNIFIED_ASM)
14206 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14208 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14212 if (TARGET_UNIFIED_ASM)
14213 sprintf (instr, "pop%s\t{", conditional);
14215 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14217 p = instr + strlen (instr);
14219 for (reg = 0; reg <= SP_REGNUM; reg++)
14220 if (live_regs_mask & (1 << reg))
14222 int l = strlen (reg_names[reg]);
14228 memcpy (p, ", ", 2);
14232 memcpy (p, "%|", 2);
14233 memcpy (p + 2, reg_names[reg], l);
14237 if (live_regs_mask & (1 << LR_REGNUM))
14239 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14240 /* If returning from an interrupt, restore the CPSR. */
14241 if (IS_INTERRUPT (func_type))
14248 output_asm_insn (instr, & operand);
14250 /* See if we need to generate an extra instruction to
14251 perform the actual function return. */
14253 && func_type != ARM_FT_INTERWORKED
14254 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14256 /* The return has already been handled
14257 by loading the LR into the PC. */
14264 switch ((int) ARM_FUNC_TYPE (func_type))
14268 /* ??? This is wrong for unified assembly syntax. */
14269 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14272 case ARM_FT_INTERWORKED:
14273 sprintf (instr, "bx%s\t%%|lr", conditional);
14276 case ARM_FT_EXCEPTION:
14277 /* ??? This is wrong for unified assembly syntax. */
14278 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14282 /* Use bx if it's available. */
14283 if (arm_arch5 || arm_arch4t)
14284 sprintf (instr, "bx%s\t%%|lr", conditional);
14286 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14290 output_asm_insn (instr, & operand);
14296 /* Write the function name into the code section, directly preceding
14297 the function prologue.
14299 Code will be output similar to this:
14301 .ascii "arm_poke_function_name", 0
14304 .word 0xff000000 + (t1 - t0)
14305 arm_poke_function_name
14307 stmfd sp!, {fp, ip, lr, pc}
14310 When performing a stack backtrace, code can inspect the value
14311 of 'pc' stored at 'fp' + 0. If the trace function then looks
14312 at location pc - 12 and the top 8 bits are set, then we know
14313 that there is a function name embedded immediately preceding this
14314 location and has length ((pc[-3]) & 0xff000000).
14316 We assume that pc is declared as a pointer to an unsigned long.
14318 It is of no benefit to output the function name if we are assembling
14319 a leaf function. These function types will not contain a stack
14320 backtrace structure, therefore it is not possible to determine the
14323 arm_poke_function_name (FILE *stream, const char *name)
14325 unsigned long alignlength;
14326 unsigned long length;
14329 length = strlen (name) + 1;
14330 alignlength = ROUND_UP_WORD (length);
14332 ASM_OUTPUT_ASCII (stream, name, length);
14333 ASM_OUTPUT_ALIGN (stream, 2);
14334 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14335 assemble_aligned_integer (UNITS_PER_WORD, x);
14338 /* Place some comments into the assembler stream
14339 describing the current function. */
14341 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14343 unsigned long func_type;
14347 thumb1_output_function_prologue (f, frame_size);
14351 /* Sanity check. */
14352 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14354 func_type = arm_current_func_type ();
14356 switch ((int) ARM_FUNC_TYPE (func_type))
14359 case ARM_FT_NORMAL:
14361 case ARM_FT_INTERWORKED:
14362 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14365 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14368 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14370 case ARM_FT_EXCEPTION:
14371 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14375 if (IS_NAKED (func_type))
14376 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14378 if (IS_VOLATILE (func_type))
14379 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14381 if (IS_NESTED (func_type))
14382 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14383 if (IS_STACKALIGN (func_type))
14384 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14386 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14388 crtl->args.pretend_args_size, frame_size);
14390 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14391 frame_pointer_needed,
14392 cfun->machine->uses_anonymous_args);
14394 if (cfun->machine->lr_save_eliminated)
14395 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14397 if (crtl->calls_eh_return)
14398 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14403 arm_output_epilogue (rtx sibling)
14406 unsigned long saved_regs_mask;
14407 unsigned long func_type;
14408 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14409 frame that is $fp + 4 for a non-variadic function. */
14410 int floats_offset = 0;
14412 FILE * f = asm_out_file;
14413 unsigned int lrm_count = 0;
14414 int really_return = (sibling == NULL);
14416 arm_stack_offsets *offsets;
14418 /* If we have already generated the return instruction
14419 then it is futile to generate anything else. */
14420 if (use_return_insn (FALSE, sibling) &&
14421 (cfun->machine->return_used_this_function != 0))
14424 func_type = arm_current_func_type ();
14426 if (IS_NAKED (func_type))
14427 /* Naked functions don't have epilogues. */
14430 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14434 /* A volatile function should never return. Call abort. */
14435 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14436 assemble_external_libcall (op);
14437 output_asm_insn ("bl\t%a0", &op);
14442 /* If we are throwing an exception, then we really must be doing a
14443 return, so we can't tail-call. */
14444 gcc_assert (!crtl->calls_eh_return || really_return);
14446 offsets = arm_get_frame_offsets ();
14447 saved_regs_mask = offsets->saved_regs_mask;
14450 lrm_count = bit_count (saved_regs_mask);
14452 floats_offset = offsets->saved_args;
14453 /* Compute how far away the floats will be. */
14454 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14455 if (saved_regs_mask & (1 << reg))
14456 floats_offset += 4;
14458 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14460 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14461 int vfp_offset = offsets->frame;
14463 if (TARGET_FPA_EMU2)
14465 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14466 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14468 floats_offset += 12;
14469 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14470 reg, FP_REGNUM, floats_offset - vfp_offset);
14475 start_reg = LAST_FPA_REGNUM;
14477 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14479 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14481 floats_offset += 12;
14483 /* We can't unstack more than four registers at once. */
14484 if (start_reg - reg == 3)
14486 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14487 reg, FP_REGNUM, floats_offset - vfp_offset);
14488 start_reg = reg - 1;
14493 if (reg != start_reg)
14494 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14495 reg + 1, start_reg - reg,
14496 FP_REGNUM, floats_offset - vfp_offset);
14497 start_reg = reg - 1;
14501 /* Just in case the last register checked also needs unstacking. */
14502 if (reg != start_reg)
14503 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14504 reg + 1, start_reg - reg,
14505 FP_REGNUM, floats_offset - vfp_offset);
14508 if (TARGET_HARD_FLOAT && TARGET_VFP)
14512 /* The fldmd insns do not have base+offset addressing
14513 modes, so we use IP to hold the address. */
14514 saved_size = arm_get_vfp_saved_size ();
14516 if (saved_size > 0)
14518 floats_offset += saved_size;
14519 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14520 FP_REGNUM, floats_offset - vfp_offset);
14522 start_reg = FIRST_VFP_REGNUM;
14523 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14525 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14526 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14528 if (start_reg != reg)
14529 vfp_output_fldmd (f, IP_REGNUM,
14530 (start_reg - FIRST_VFP_REGNUM) / 2,
14531 (reg - start_reg) / 2);
14532 start_reg = reg + 2;
14535 if (start_reg != reg)
14536 vfp_output_fldmd (f, IP_REGNUM,
14537 (start_reg - FIRST_VFP_REGNUM) / 2,
14538 (reg - start_reg) / 2);
14543 /* The frame pointer is guaranteed to be non-double-word aligned.
14544 This is because it is set to (old_stack_pointer - 4) and the
14545 old_stack_pointer was double word aligned. Thus the offset to
14546 the iWMMXt registers to be loaded must also be non-double-word
14547 sized, so that the resultant address *is* double-word aligned.
14548 We can ignore floats_offset since that was already included in
14549 the live_regs_mask. */
14550 lrm_count += (lrm_count % 2 ? 2 : 1);
14552 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14553 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14555 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14556 reg, FP_REGNUM, lrm_count * 4);
14561 /* saved_regs_mask should contain the IP, which at the time of stack
14562 frame generation actually contains the old stack pointer. So a
14563 quick way to unwind the stack is just pop the IP register directly
14564 into the stack pointer. */
14565 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14566 saved_regs_mask &= ~ (1 << IP_REGNUM);
14567 saved_regs_mask |= (1 << SP_REGNUM);
14569 /* There are two registers left in saved_regs_mask - LR and PC. We
14570 only need to restore the LR register (the return address), but to
14571 save time we can load it directly into the PC, unless we need a
14572 special function exit sequence, or we are not really returning. */
14574 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14575 && !crtl->calls_eh_return)
14576 /* Delete the LR from the register mask, so that the LR on
14577 the stack is loaded into the PC in the register mask. */
14578 saved_regs_mask &= ~ (1 << LR_REGNUM);
14580 saved_regs_mask &= ~ (1 << PC_REGNUM);
14582 /* We must use SP as the base register, because SP is one of the
14583 registers being restored. If an interrupt or page fault
14584 happens in the ldm instruction, the SP might or might not
14585 have been restored. That would be bad, as then SP will no
14586 longer indicate the safe area of stack, and we can get stack
14587 corruption. Using SP as the base register means that it will
14588 be reset correctly to the original value, should an interrupt
14589 occur. If the stack pointer already points at the right
14590 place, then omit the subtraction. */
14591 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14592 || cfun->calls_alloca)
14593 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14594 4 * bit_count (saved_regs_mask));
14595 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14597 if (IS_INTERRUPT (func_type))
14598 /* Interrupt handlers will have pushed the
14599 IP onto the stack, so restore it now. */
14600 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14604 /* This branch is executed for ARM mode (non-apcs frames) and
14605 Thumb-2 mode. Frame layout is essentially the same for those
14606 cases, except that in ARM mode frame pointer points to the
14607 first saved register, while in Thumb-2 mode the frame pointer points
14608 to the last saved register.
14610 It is possible to make frame pointer point to last saved
14611 register in both cases, and remove some conditionals below.
14612 That means that fp setup in prologue would be just "mov fp, sp"
14613 and sp restore in epilogue would be just "mov sp, fp", whereas
14614 now we have to use add/sub in those cases. However, the value
14615 of that would be marginal, as both mov and add/sub are 32-bit
14616 in ARM mode, and it would require extra conditionals
14617 in arm_expand_prologue to distingish ARM-apcs-frame case
14618 (where frame pointer is required to point at first register)
14619 and ARM-non-apcs-frame. Therefore, such change is postponed
14620 until real need arise. */
14621 unsigned HOST_WIDE_INT amount;
14623 /* Restore stack pointer if necessary. */
14624 if (TARGET_ARM && frame_pointer_needed)
14626 operands[0] = stack_pointer_rtx;
14627 operands[1] = hard_frame_pointer_rtx;
14629 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14630 output_add_immediate (operands);
14634 if (frame_pointer_needed)
14636 /* For Thumb-2 restore sp from the frame pointer.
14637 Operand restrictions mean we have to incrememnt FP, then copy
14639 amount = offsets->locals_base - offsets->saved_regs;
14640 operands[0] = hard_frame_pointer_rtx;
14644 unsigned long count;
14645 operands[0] = stack_pointer_rtx;
14646 amount = offsets->outgoing_args - offsets->saved_regs;
14647 /* pop call clobbered registers if it avoids a
14648 separate stack adjustment. */
14649 count = offsets->saved_regs - offsets->saved_args;
14652 && !crtl->calls_eh_return
14653 && bit_count(saved_regs_mask) * 4 == count
14654 && !IS_INTERRUPT (func_type)
14655 && !crtl->tail_call_emit)
14657 unsigned long mask;
14658 /* Preserve return values, of any size. */
14659 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14661 mask &= ~saved_regs_mask;
14663 while (bit_count (mask) * 4 > amount)
14665 while ((mask & (1 << reg)) == 0)
14667 mask &= ~(1 << reg);
14669 if (bit_count (mask) * 4 == amount) {
14671 saved_regs_mask |= mask;
14678 operands[1] = operands[0];
14679 operands[2] = GEN_INT (amount);
14680 output_add_immediate (operands);
14682 if (frame_pointer_needed)
14683 asm_fprintf (f, "\tmov\t%r, %r\n",
14684 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14687 if (TARGET_FPA_EMU2)
14689 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14690 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14691 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14696 start_reg = FIRST_FPA_REGNUM;
14698 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14700 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14702 if (reg - start_reg == 3)
14704 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14705 start_reg, SP_REGNUM);
14706 start_reg = reg + 1;
14711 if (reg != start_reg)
14712 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14713 start_reg, reg - start_reg,
14716 start_reg = reg + 1;
14720 /* Just in case the last register checked also needs unstacking. */
14721 if (reg != start_reg)
14722 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14723 start_reg, reg - start_reg, SP_REGNUM);
14726 if (TARGET_HARD_FLOAT && TARGET_VFP)
14728 int end_reg = LAST_VFP_REGNUM + 1;
14730 /* Scan the registers in reverse order. We need to match
14731 any groupings made in the prologue and generate matching
14733 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14735 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14736 && (!df_regs_ever_live_p (reg + 1)
14737 || call_used_regs[reg + 1]))
14739 if (end_reg > reg + 2)
14740 vfp_output_fldmd (f, SP_REGNUM,
14741 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14742 (end_reg - (reg + 2)) / 2);
14746 if (end_reg > reg + 2)
14747 vfp_output_fldmd (f, SP_REGNUM, 0,
14748 (end_reg - (reg + 2)) / 2);
14752 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14753 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14754 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14756 /* If we can, restore the LR into the PC. */
14757 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14758 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14759 && !IS_STACKALIGN (func_type)
14761 && crtl->args.pretend_args_size == 0
14762 && saved_regs_mask & (1 << LR_REGNUM)
14763 && !crtl->calls_eh_return)
14765 saved_regs_mask &= ~ (1 << LR_REGNUM);
14766 saved_regs_mask |= (1 << PC_REGNUM);
14767 rfe = IS_INTERRUPT (func_type);
14772 /* Load the registers off the stack. If we only have one register
14773 to load use the LDR instruction - it is faster. For Thumb-2
14774 always use pop and the assembler will pick the best instruction.*/
14775 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14776 && !IS_INTERRUPT(func_type))
14778 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14780 else if (saved_regs_mask)
14782 if (saved_regs_mask & (1 << SP_REGNUM))
14783 /* Note - write back to the stack register is not enabled
14784 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14785 in the list of registers and if we add writeback the
14786 instruction becomes UNPREDICTABLE. */
14787 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14789 else if (TARGET_ARM)
14790 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14793 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14796 if (crtl->args.pretend_args_size)
14798 /* Unwind the pre-pushed regs. */
14799 operands[0] = operands[1] = stack_pointer_rtx;
14800 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14801 output_add_immediate (operands);
14805 /* We may have already restored PC directly from the stack. */
14806 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14809 /* Stack adjustment for exception handler. */
14810 if (crtl->calls_eh_return)
14811 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14812 ARM_EH_STACKADJ_REGNUM);
14814 /* Generate the return instruction. */
14815 switch ((int) ARM_FUNC_TYPE (func_type))
14819 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14822 case ARM_FT_EXCEPTION:
14823 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14826 case ARM_FT_INTERWORKED:
14827 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14831 if (IS_STACKALIGN (func_type))
14833 /* See comment in arm_expand_prologue. */
14834 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14836 if (arm_arch5 || arm_arch4t)
14837 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14839 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14847 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14848 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14850 arm_stack_offsets *offsets;
14856 /* Emit any call-via-reg trampolines that are needed for v4t support
14857 of call_reg and call_value_reg type insns. */
14858 for (regno = 0; regno < LR_REGNUM; regno++)
14860 rtx label = cfun->machine->call_via[regno];
14864 switch_to_section (function_section (current_function_decl));
14865 targetm.asm_out.internal_label (asm_out_file, "L",
14866 CODE_LABEL_NUMBER (label));
14867 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14871 /* ??? Probably not safe to set this here, since it assumes that a
14872 function will be emitted as assembly immediately after we generate
14873 RTL for it. This does not happen for inline functions. */
14874 cfun->machine->return_used_this_function = 0;
14876 else /* TARGET_32BIT */
14878 /* We need to take into account any stack-frame rounding. */
14879 offsets = arm_get_frame_offsets ();
14881 gcc_assert (!use_return_insn (FALSE, NULL)
14882 || (cfun->machine->return_used_this_function != 0)
14883 || offsets->saved_regs == offsets->outgoing_args
14884 || frame_pointer_needed);
14886 /* Reset the ARM-specific per-function variables. */
14887 after_arm_reorg = 0;
14891 /* Generate and emit an insn that we will recognize as a push_multi.
14892 Unfortunately, since this insn does not reflect very well the actual
14893 semantics of the operation, we need to annotate the insn for the benefit
14894 of DWARF2 frame unwind information. */
14896 emit_multi_reg_push (unsigned long mask)
14899 int num_dwarf_regs;
14903 int dwarf_par_index;
14906 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14907 if (mask & (1 << i))
14910 gcc_assert (num_regs && num_regs <= 16);
14912 /* We don't record the PC in the dwarf frame information. */
14913 num_dwarf_regs = num_regs;
14914 if (mask & (1 << PC_REGNUM))
14917 /* For the body of the insn we are going to generate an UNSPEC in
14918 parallel with several USEs. This allows the insn to be recognized
14919 by the push_multi pattern in the arm.md file.
14921 The body of the insn looks something like this:
14924 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14925 (const_int:SI <num>)))
14926 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14932 For the frame note however, we try to be more explicit and actually
14933 show each register being stored into the stack frame, plus a (single)
14934 decrement of the stack pointer. We do it this way in order to be
14935 friendly to the stack unwinding code, which only wants to see a single
14936 stack decrement per instruction. The RTL we generate for the note looks
14937 something like this:
14940 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14941 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14942 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14943 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14947 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14948 instead we'd have a parallel expression detailing all
14949 the stores to the various memory addresses so that debug
14950 information is more up-to-date. Remember however while writing
14951 this to take care of the constraints with the push instruction.
14953 Note also that this has to be taken care of for the VFP registers.
14955 For more see PR43399. */
14957 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14958 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14959 dwarf_par_index = 1;
14961 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14963 if (mask & (1 << i))
14965 reg = gen_rtx_REG (SImode, i);
14967 XVECEXP (par, 0, 0)
14968 = gen_rtx_SET (VOIDmode,
14971 gen_rtx_PRE_MODIFY (Pmode,
14974 (stack_pointer_rtx,
14977 gen_rtx_UNSPEC (BLKmode,
14978 gen_rtvec (1, reg),
14979 UNSPEC_PUSH_MULT));
14981 if (i != PC_REGNUM)
14983 tmp = gen_rtx_SET (VOIDmode,
14984 gen_frame_mem (SImode, stack_pointer_rtx),
14986 RTX_FRAME_RELATED_P (tmp) = 1;
14987 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14995 for (j = 1, i++; j < num_regs; i++)
14997 if (mask & (1 << i))
14999 reg = gen_rtx_REG (SImode, i);
15001 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15003 if (i != PC_REGNUM)
15006 = gen_rtx_SET (VOIDmode,
15009 plus_constant (stack_pointer_rtx,
15012 RTX_FRAME_RELATED_P (tmp) = 1;
15013 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15020 par = emit_insn (par);
15022 tmp = gen_rtx_SET (VOIDmode,
15024 plus_constant (stack_pointer_rtx, -4 * num_regs));
15025 RTX_FRAME_RELATED_P (tmp) = 1;
15026 XVECEXP (dwarf, 0, 0) = tmp;
15028 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15033 /* Calculate the size of the return value that is passed in registers. */
15035 arm_size_return_regs (void)
15037 enum machine_mode mode;
15039 if (crtl->return_rtx != 0)
15040 mode = GET_MODE (crtl->return_rtx);
15042 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15044 return GET_MODE_SIZE (mode);
15048 emit_sfm (int base_reg, int count)
15055 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15056 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15058 reg = gen_rtx_REG (XFmode, base_reg++);
15060 XVECEXP (par, 0, 0)
15061 = gen_rtx_SET (VOIDmode,
15064 gen_rtx_PRE_MODIFY (Pmode,
15067 (stack_pointer_rtx,
15070 gen_rtx_UNSPEC (BLKmode,
15071 gen_rtvec (1, reg),
15072 UNSPEC_PUSH_MULT));
15073 tmp = gen_rtx_SET (VOIDmode,
15074 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15075 RTX_FRAME_RELATED_P (tmp) = 1;
15076 XVECEXP (dwarf, 0, 1) = tmp;
15078 for (i = 1; i < count; i++)
15080 reg = gen_rtx_REG (XFmode, base_reg++);
15081 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15083 tmp = gen_rtx_SET (VOIDmode,
15084 gen_frame_mem (XFmode,
15085 plus_constant (stack_pointer_rtx,
15088 RTX_FRAME_RELATED_P (tmp) = 1;
15089 XVECEXP (dwarf, 0, i + 1) = tmp;
15092 tmp = gen_rtx_SET (VOIDmode,
15094 plus_constant (stack_pointer_rtx, -12 * count));
15096 RTX_FRAME_RELATED_P (tmp) = 1;
15097 XVECEXP (dwarf, 0, 0) = tmp;
15099 par = emit_insn (par);
15100 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15106 /* Return true if the current function needs to save/restore LR. */
15109 thumb_force_lr_save (void)
15111 return !cfun->machine->lr_save_eliminated
15112 && (!leaf_function_p ()
15113 || thumb_far_jump_used_p ()
15114 || df_regs_ever_live_p (LR_REGNUM));
15118 /* Compute the distance from register FROM to register TO.
15119 These can be the arg pointer (26), the soft frame pointer (25),
15120 the stack pointer (13) or the hard frame pointer (11).
15121 In thumb mode r7 is used as the soft frame pointer, if needed.
15122 Typical stack layout looks like this:
15124 old stack pointer -> | |
15127 | | saved arguments for
15128 | | vararg functions
15131 hard FP & arg pointer -> | | \
15139 soft frame pointer -> | | /
15144 locals base pointer -> | | /
15149 current stack pointer -> | | /
15152 For a given function some or all of these stack components
15153 may not be needed, giving rise to the possibility of
15154 eliminating some of the registers.
15156 The values returned by this function must reflect the behavior
15157 of arm_expand_prologue() and arm_compute_save_reg_mask().
15159 The sign of the number returned reflects the direction of stack
15160 growth, so the values are positive for all eliminations except
15161 from the soft frame pointer to the hard frame pointer.
15163 SFP may point just inside the local variables block to ensure correct
15167 /* Calculate stack offsets. These are used to calculate register elimination
15168 offsets and in prologue/epilogue code. Also calculates which registers
15169 should be saved. */
15171 static arm_stack_offsets *
15172 arm_get_frame_offsets (void)
15174 struct arm_stack_offsets *offsets;
15175 unsigned long func_type;
15179 HOST_WIDE_INT frame_size;
15182 offsets = &cfun->machine->stack_offsets;
15184 /* We need to know if we are a leaf function. Unfortunately, it
15185 is possible to be called after start_sequence has been called,
15186 which causes get_insns to return the insns for the sequence,
15187 not the function, which will cause leaf_function_p to return
15188 the incorrect result.
15190 to know about leaf functions once reload has completed, and the
15191 frame size cannot be changed after that time, so we can safely
15192 use the cached value. */
15194 if (reload_completed)
15197 /* Initially this is the size of the local variables. It will translated
15198 into an offset once we have determined the size of preceding data. */
15199 frame_size = ROUND_UP_WORD (get_frame_size ());
15201 leaf = leaf_function_p ();
15203 /* Space for variadic functions. */
15204 offsets->saved_args = crtl->args.pretend_args_size;
15206 /* In Thumb mode this is incorrect, but never used. */
15207 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15208 arm_compute_static_chain_stack_bytes();
15212 unsigned int regno;
15214 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15215 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15216 saved = core_saved;
15218 /* We know that SP will be doubleword aligned on entry, and we must
15219 preserve that condition at any subroutine call. We also require the
15220 soft frame pointer to be doubleword aligned. */
15222 if (TARGET_REALLY_IWMMXT)
15224 /* Check for the call-saved iWMMXt registers. */
15225 for (regno = FIRST_IWMMXT_REGNUM;
15226 regno <= LAST_IWMMXT_REGNUM;
15228 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15232 func_type = arm_current_func_type ();
15233 if (! IS_VOLATILE (func_type))
15235 /* Space for saved FPA registers. */
15236 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15237 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15240 /* Space for saved VFP registers. */
15241 if (TARGET_HARD_FLOAT && TARGET_VFP)
15242 saved += arm_get_vfp_saved_size ();
15245 else /* TARGET_THUMB1 */
15247 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15248 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15249 saved = core_saved;
15250 if (TARGET_BACKTRACE)
15254 /* Saved registers include the stack frame. */
15255 offsets->saved_regs = offsets->saved_args + saved +
15256 arm_compute_static_chain_stack_bytes();
15257 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15258 /* A leaf function does not need any stack alignment if it has nothing
15260 if (leaf && frame_size == 0)
15262 offsets->outgoing_args = offsets->soft_frame;
15263 offsets->locals_base = offsets->soft_frame;
15267 /* Ensure SFP has the correct alignment. */
15268 if (ARM_DOUBLEWORD_ALIGN
15269 && (offsets->soft_frame & 7))
15271 offsets->soft_frame += 4;
15272 /* Try to align stack by pushing an extra reg. Don't bother doing this
15273 when there is a stack frame as the alignment will be rolled into
15274 the normal stack adjustment. */
15275 if (frame_size + crtl->outgoing_args_size == 0)
15279 /* If it is safe to use r3, then do so. This sometimes
15280 generates better code on Thumb-2 by avoiding the need to
15281 use 32-bit push/pop instructions. */
15282 if (!crtl->tail_call_emit
15283 && arm_size_return_regs () <= 12
15284 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15289 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15291 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15300 offsets->saved_regs += 4;
15301 offsets->saved_regs_mask |= (1 << reg);
15306 offsets->locals_base = offsets->soft_frame + frame_size;
15307 offsets->outgoing_args = (offsets->locals_base
15308 + crtl->outgoing_args_size);
15310 if (ARM_DOUBLEWORD_ALIGN)
15312 /* Ensure SP remains doubleword aligned. */
15313 if (offsets->outgoing_args & 7)
15314 offsets->outgoing_args += 4;
15315 gcc_assert (!(offsets->outgoing_args & 7));
15322 /* Calculate the relative offsets for the different stack pointers. Positive
15323 offsets are in the direction of stack growth. */
15326 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15328 arm_stack_offsets *offsets;
15330 offsets = arm_get_frame_offsets ();
15332 /* OK, now we have enough information to compute the distances.
15333 There must be an entry in these switch tables for each pair
15334 of registers in ELIMINABLE_REGS, even if some of the entries
15335 seem to be redundant or useless. */
15338 case ARG_POINTER_REGNUM:
15341 case THUMB_HARD_FRAME_POINTER_REGNUM:
15344 case FRAME_POINTER_REGNUM:
15345 /* This is the reverse of the soft frame pointer
15346 to hard frame pointer elimination below. */
15347 return offsets->soft_frame - offsets->saved_args;
15349 case ARM_HARD_FRAME_POINTER_REGNUM:
15350 /* This is only non-zero in the case where the static chain register
15351 is stored above the frame. */
15352 return offsets->frame - offsets->saved_args - 4;
15354 case STACK_POINTER_REGNUM:
15355 /* If nothing has been pushed on the stack at all
15356 then this will return -4. This *is* correct! */
15357 return offsets->outgoing_args - (offsets->saved_args + 4);
15360 gcc_unreachable ();
15362 gcc_unreachable ();
15364 case FRAME_POINTER_REGNUM:
15367 case THUMB_HARD_FRAME_POINTER_REGNUM:
15370 case ARM_HARD_FRAME_POINTER_REGNUM:
15371 /* The hard frame pointer points to the top entry in the
15372 stack frame. The soft frame pointer to the bottom entry
15373 in the stack frame. If there is no stack frame at all,
15374 then they are identical. */
15376 return offsets->frame - offsets->soft_frame;
15378 case STACK_POINTER_REGNUM:
15379 return offsets->outgoing_args - offsets->soft_frame;
15382 gcc_unreachable ();
15384 gcc_unreachable ();
15387 /* You cannot eliminate from the stack pointer.
15388 In theory you could eliminate from the hard frame
15389 pointer to the stack pointer, but this will never
15390 happen, since if a stack frame is not needed the
15391 hard frame pointer will never be used. */
15392 gcc_unreachable ();
15396 /* Given FROM and TO register numbers, say whether this elimination is
15397 allowed. Frame pointer elimination is automatically handled.
15399 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15400 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15401 pointer, we must eliminate FRAME_POINTER_REGNUM into
15402 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15403 ARG_POINTER_REGNUM. */
15406 arm_can_eliminate (const int from, const int to)
15408 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15409 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15410 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15411 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15415 /* Emit RTL to save coprocessor registers on function entry. Returns the
15416 number of bytes pushed. */
15419 arm_save_coproc_regs(void)
15421 int saved_size = 0;
15423 unsigned start_reg;
15426 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15427 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15429 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15430 insn = gen_rtx_MEM (V2SImode, insn);
15431 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15432 RTX_FRAME_RELATED_P (insn) = 1;
15436 /* Save any floating point call-saved registers used by this
15438 if (TARGET_FPA_EMU2)
15440 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15441 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15443 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15444 insn = gen_rtx_MEM (XFmode, insn);
15445 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15446 RTX_FRAME_RELATED_P (insn) = 1;
15452 start_reg = LAST_FPA_REGNUM;
15454 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15456 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15458 if (start_reg - reg == 3)
15460 insn = emit_sfm (reg, 4);
15461 RTX_FRAME_RELATED_P (insn) = 1;
15463 start_reg = reg - 1;
15468 if (start_reg != reg)
15470 insn = emit_sfm (reg + 1, start_reg - reg);
15471 RTX_FRAME_RELATED_P (insn) = 1;
15472 saved_size += (start_reg - reg) * 12;
15474 start_reg = reg - 1;
15478 if (start_reg != reg)
15480 insn = emit_sfm (reg + 1, start_reg - reg);
15481 saved_size += (start_reg - reg) * 12;
15482 RTX_FRAME_RELATED_P (insn) = 1;
15485 if (TARGET_HARD_FLOAT && TARGET_VFP)
15487 start_reg = FIRST_VFP_REGNUM;
15489 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15491 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15492 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15494 if (start_reg != reg)
15495 saved_size += vfp_emit_fstmd (start_reg,
15496 (reg - start_reg) / 2);
15497 start_reg = reg + 2;
15500 if (start_reg != reg)
15501 saved_size += vfp_emit_fstmd (start_reg,
15502 (reg - start_reg) / 2);
15508 /* Set the Thumb frame pointer from the stack pointer. */
15511 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15513 HOST_WIDE_INT amount;
15516 amount = offsets->outgoing_args - offsets->locals_base;
15518 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15519 stack_pointer_rtx, GEN_INT (amount)));
15522 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15523 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15524 expects the first two operands to be the same. */
15527 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15529 hard_frame_pointer_rtx));
15533 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15534 hard_frame_pointer_rtx,
15535 stack_pointer_rtx));
15537 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15538 plus_constant (stack_pointer_rtx, amount));
15539 RTX_FRAME_RELATED_P (dwarf) = 1;
15540 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15543 RTX_FRAME_RELATED_P (insn) = 1;
15546 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15549 arm_expand_prologue (void)
15554 unsigned long live_regs_mask;
15555 unsigned long func_type;
15557 int saved_pretend_args = 0;
15558 int saved_regs = 0;
15559 unsigned HOST_WIDE_INT args_to_push;
15560 arm_stack_offsets *offsets;
15562 func_type = arm_current_func_type ();
15564 /* Naked functions don't have prologues. */
15565 if (IS_NAKED (func_type))
15568 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15569 args_to_push = crtl->args.pretend_args_size;
15571 /* Compute which register we will have to save onto the stack. */
15572 offsets = arm_get_frame_offsets ();
15573 live_regs_mask = offsets->saved_regs_mask;
15575 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15577 if (IS_STACKALIGN (func_type))
15582 /* Handle a word-aligned stack pointer. We generate the following:
15587 <save and restore r0 in normal prologue/epilogue>
15591 The unwinder doesn't need to know about the stack realignment.
15592 Just tell it we saved SP in r0. */
15593 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15595 r0 = gen_rtx_REG (SImode, 0);
15596 r1 = gen_rtx_REG (SImode, 1);
15597 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15598 compiler won't choke. */
15599 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15600 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15601 insn = gen_movsi (r0, stack_pointer_rtx);
15602 RTX_FRAME_RELATED_P (insn) = 1;
15603 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15605 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15606 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15609 /* For APCS frames, if IP register is clobbered
15610 when creating frame, save that register in a special
15612 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15614 if (IS_INTERRUPT (func_type))
15616 /* Interrupt functions must not corrupt any registers.
15617 Creating a frame pointer however, corrupts the IP
15618 register, so we must push it first. */
15619 insn = emit_multi_reg_push (1 << IP_REGNUM);
15621 /* Do not set RTX_FRAME_RELATED_P on this insn.
15622 The dwarf stack unwinding code only wants to see one
15623 stack decrement per function, and this is not it. If
15624 this instruction is labeled as being part of the frame
15625 creation sequence then dwarf2out_frame_debug_expr will
15626 die when it encounters the assignment of IP to FP
15627 later on, since the use of SP here establishes SP as
15628 the CFA register and not IP.
15630 Anyway this instruction is not really part of the stack
15631 frame creation although it is part of the prologue. */
15633 else if (IS_NESTED (func_type))
15635 /* The Static chain register is the same as the IP register
15636 used as a scratch register during stack frame creation.
15637 To get around this need to find somewhere to store IP
15638 whilst the frame is being created. We try the following
15641 1. The last argument register.
15642 2. A slot on the stack above the frame. (This only
15643 works if the function is not a varargs function).
15644 3. Register r3, after pushing the argument registers
15647 Note - we only need to tell the dwarf2 backend about the SP
15648 adjustment in the second variant; the static chain register
15649 doesn't need to be unwound, as it doesn't contain a value
15650 inherited from the caller. */
15652 if (df_regs_ever_live_p (3) == false)
15653 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15654 else if (args_to_push == 0)
15658 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15661 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15662 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15665 /* Just tell the dwarf backend that we adjusted SP. */
15666 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15667 plus_constant (stack_pointer_rtx,
15669 RTX_FRAME_RELATED_P (insn) = 1;
15670 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15674 /* Store the args on the stack. */
15675 if (cfun->machine->uses_anonymous_args)
15676 insn = emit_multi_reg_push
15677 ((0xf0 >> (args_to_push / 4)) & 0xf);
15680 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15681 GEN_INT (- args_to_push)));
15683 RTX_FRAME_RELATED_P (insn) = 1;
15685 saved_pretend_args = 1;
15686 fp_offset = args_to_push;
15689 /* Now reuse r3 to preserve IP. */
15690 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15694 insn = emit_set_insn (ip_rtx,
15695 plus_constant (stack_pointer_rtx, fp_offset));
15696 RTX_FRAME_RELATED_P (insn) = 1;
15701 /* Push the argument registers, or reserve space for them. */
15702 if (cfun->machine->uses_anonymous_args)
15703 insn = emit_multi_reg_push
15704 ((0xf0 >> (args_to_push / 4)) & 0xf);
15707 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15708 GEN_INT (- args_to_push)));
15709 RTX_FRAME_RELATED_P (insn) = 1;
15712 /* If this is an interrupt service routine, and the link register
15713 is going to be pushed, and we're not generating extra
15714 push of IP (needed when frame is needed and frame layout if apcs),
15715 subtracting four from LR now will mean that the function return
15716 can be done with a single instruction. */
15717 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15718 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15719 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15722 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15724 emit_set_insn (lr, plus_constant (lr, -4));
15727 if (live_regs_mask)
15729 saved_regs += bit_count (live_regs_mask) * 4;
15730 if (optimize_size && !frame_pointer_needed
15731 && saved_regs == offsets->saved_regs - offsets->saved_args)
15733 /* If no coprocessor registers are being pushed and we don't have
15734 to worry about a frame pointer then push extra registers to
15735 create the stack frame. This is done is a way that does not
15736 alter the frame layout, so is independent of the epilogue. */
15740 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15742 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15743 if (frame && n * 4 >= frame)
15746 live_regs_mask |= (1 << n) - 1;
15747 saved_regs += frame;
15750 insn = emit_multi_reg_push (live_regs_mask);
15751 RTX_FRAME_RELATED_P (insn) = 1;
15754 if (! IS_VOLATILE (func_type))
15755 saved_regs += arm_save_coproc_regs ();
15757 if (frame_pointer_needed && TARGET_ARM)
15759 /* Create the new frame pointer. */
15760 if (TARGET_APCS_FRAME)
15762 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15763 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15764 RTX_FRAME_RELATED_P (insn) = 1;
15766 if (IS_NESTED (func_type))
15768 /* Recover the static chain register. */
15769 if (!df_regs_ever_live_p (3)
15770 || saved_pretend_args)
15771 insn = gen_rtx_REG (SImode, 3);
15772 else /* if (crtl->args.pretend_args_size == 0) */
15774 insn = plus_constant (hard_frame_pointer_rtx, 4);
15775 insn = gen_frame_mem (SImode, insn);
15777 emit_set_insn (ip_rtx, insn);
15778 /* Add a USE to stop propagate_one_insn() from barfing. */
15779 emit_insn (gen_prologue_use (ip_rtx));
15784 insn = GEN_INT (saved_regs - 4);
15785 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15786 stack_pointer_rtx, insn));
15787 RTX_FRAME_RELATED_P (insn) = 1;
15791 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15793 /* This add can produce multiple insns for a large constant, so we
15794 need to get tricky. */
15795 rtx last = get_last_insn ();
15797 amount = GEN_INT (offsets->saved_args + saved_regs
15798 - offsets->outgoing_args);
15800 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15804 last = last ? NEXT_INSN (last) : get_insns ();
15805 RTX_FRAME_RELATED_P (last) = 1;
15807 while (last != insn);
15809 /* If the frame pointer is needed, emit a special barrier that
15810 will prevent the scheduler from moving stores to the frame
15811 before the stack adjustment. */
15812 if (frame_pointer_needed)
15813 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15814 hard_frame_pointer_rtx));
15818 if (frame_pointer_needed && TARGET_THUMB2)
15819 thumb_set_frame_pointer (offsets);
15821 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15823 unsigned long mask;
15825 mask = live_regs_mask;
15826 mask &= THUMB2_WORK_REGS;
15827 if (!IS_NESTED (func_type))
15828 mask |= (1 << IP_REGNUM);
15829 arm_load_pic_register (mask);
15832 /* If we are profiling, make sure no instructions are scheduled before
15833 the call to mcount. Similarly if the user has requested no
15834 scheduling in the prolog. Similarly if we want non-call exceptions
15835 using the EABI unwinder, to prevent faulting instructions from being
15836 swapped with a stack adjustment. */
15837 if (crtl->profile || !TARGET_SCHED_PROLOG
15838 || (arm_except_unwind_info (&global_options) == UI_TARGET
15839 && cfun->can_throw_non_call_exceptions))
15840 emit_insn (gen_blockage ());
15842 /* If the link register is being kept alive, with the return address in it,
15843 then make sure that it does not get reused by the ce2 pass. */
15844 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15845 cfun->machine->lr_save_eliminated = 1;
15848 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15850 arm_print_condition (FILE *stream)
15852 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15854 /* Branch conversion is not implemented for Thumb-2. */
15857 output_operand_lossage ("predicated Thumb instruction");
15860 if (current_insn_predicate != NULL)
15862 output_operand_lossage
15863 ("predicated instruction in conditional sequence");
15867 fputs (arm_condition_codes[arm_current_cc], stream);
15869 else if (current_insn_predicate)
15871 enum arm_cond_code code;
15875 output_operand_lossage ("predicated Thumb instruction");
15879 code = get_arm_condition_code (current_insn_predicate);
15880 fputs (arm_condition_codes[code], stream);
15885 /* If CODE is 'd', then the X is a condition operand and the instruction
15886 should only be executed if the condition is true.
15887 if CODE is 'D', then the X is a condition operand and the instruction
15888 should only be executed if the condition is false: however, if the mode
15889 of the comparison is CCFPEmode, then always execute the instruction -- we
15890 do this because in these circumstances !GE does not necessarily imply LT;
15891 in these cases the instruction pattern will take care to make sure that
15892 an instruction containing %d will follow, thereby undoing the effects of
15893 doing this instruction unconditionally.
15894 If CODE is 'N' then X is a floating point operand that must be negated
15896 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15897 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15899 arm_print_operand (FILE *stream, rtx x, int code)
15904 fputs (ASM_COMMENT_START, stream);
15908 fputs (user_label_prefix, stream);
15912 fputs (REGISTER_PREFIX, stream);
15916 arm_print_condition (stream);
15920 /* Nothing in unified syntax, otherwise the current condition code. */
15921 if (!TARGET_UNIFIED_ASM)
15922 arm_print_condition (stream);
15926 /* The current condition code in unified syntax, otherwise nothing. */
15927 if (TARGET_UNIFIED_ASM)
15928 arm_print_condition (stream);
15932 /* The current condition code for a condition code setting instruction.
15933 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15934 if (TARGET_UNIFIED_ASM)
15936 fputc('s', stream);
15937 arm_print_condition (stream);
15941 arm_print_condition (stream);
15942 fputc('s', stream);
15947 /* If the instruction is conditionally executed then print
15948 the current condition code, otherwise print 's'. */
15949 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15950 if (current_insn_predicate)
15951 arm_print_condition (stream);
15953 fputc('s', stream);
15956 /* %# is a "break" sequence. It doesn't output anything, but is used to
15957 separate e.g. operand numbers from following text, if that text consists
15958 of further digits which we don't want to be part of the operand
15966 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15967 r = real_value_negate (&r);
15968 fprintf (stream, "%s", fp_const_from_val (&r));
15972 /* An integer or symbol address without a preceding # sign. */
15974 switch (GET_CODE (x))
15977 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15981 output_addr_const (stream, x);
15985 gcc_unreachable ();
15990 if (GET_CODE (x) == CONST_INT)
15993 val = ARM_SIGN_EXTEND (~INTVAL (x));
15994 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15998 putc ('~', stream);
15999 output_addr_const (stream, x);
16004 /* The low 16 bits of an immediate constant. */
16005 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16009 fprintf (stream, "%s", arithmetic_instr (x, 1));
16012 /* Truncate Cirrus shift counts. */
16014 if (GET_CODE (x) == CONST_INT)
16016 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16019 arm_print_operand (stream, x, 0);
16023 fprintf (stream, "%s", arithmetic_instr (x, 0));
16031 if (!shift_operator (x, SImode))
16033 output_operand_lossage ("invalid shift operand");
16037 shift = shift_op (x, &val);
16041 fprintf (stream, ", %s ", shift);
16043 arm_print_operand (stream, XEXP (x, 1), 0);
16045 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16050 /* An explanation of the 'Q', 'R' and 'H' register operands:
16052 In a pair of registers containing a DI or DF value the 'Q'
16053 operand returns the register number of the register containing
16054 the least significant part of the value. The 'R' operand returns
16055 the register number of the register containing the most
16056 significant part of the value.
16058 The 'H' operand returns the higher of the two register numbers.
16059 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16060 same as the 'Q' operand, since the most significant part of the
16061 value is held in the lower number register. The reverse is true
16062 on systems where WORDS_BIG_ENDIAN is false.
16064 The purpose of these operands is to distinguish between cases
16065 where the endian-ness of the values is important (for example
16066 when they are added together), and cases where the endian-ness
16067 is irrelevant, but the order of register operations is important.
16068 For example when loading a value from memory into a register
16069 pair, the endian-ness does not matter. Provided that the value
16070 from the lower memory address is put into the lower numbered
16071 register, and the value from the higher address is put into the
16072 higher numbered register, the load will work regardless of whether
16073 the value being loaded is big-wordian or little-wordian. The
16074 order of the two register loads can matter however, if the address
16075 of the memory location is actually held in one of the registers
16076 being overwritten by the load.
16078 The 'Q' and 'R' constraints are also available for 64-bit
16081 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16083 rtx part = gen_lowpart (SImode, x);
16084 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16088 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16090 output_operand_lossage ("invalid operand for code '%c'", code);
16094 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16098 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16100 enum machine_mode mode = GET_MODE (x);
16103 if (mode == VOIDmode)
16105 part = gen_highpart_mode (SImode, mode, x);
16106 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16110 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16112 output_operand_lossage ("invalid operand for code '%c'", code);
16116 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16120 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16122 output_operand_lossage ("invalid operand for code '%c'", code);
16126 asm_fprintf (stream, "%r", REGNO (x) + 1);
16130 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16132 output_operand_lossage ("invalid operand for code '%c'", code);
16136 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16140 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16142 output_operand_lossage ("invalid operand for code '%c'", code);
16146 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16150 asm_fprintf (stream, "%r",
16151 GET_CODE (XEXP (x, 0)) == REG
16152 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16156 asm_fprintf (stream, "{%r-%r}",
16158 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16161 /* Like 'M', but writing doubleword vector registers, for use by Neon
16165 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16166 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16168 asm_fprintf (stream, "{d%d}", regno);
16170 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16175 /* CONST_TRUE_RTX means always -- that's the default. */
16176 if (x == const_true_rtx)
16179 if (!COMPARISON_P (x))
16181 output_operand_lossage ("invalid operand for code '%c'", code);
16185 fputs (arm_condition_codes[get_arm_condition_code (x)],
16190 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16191 want to do that. */
16192 if (x == const_true_rtx)
16194 output_operand_lossage ("instruction never executed");
16197 if (!COMPARISON_P (x))
16199 output_operand_lossage ("invalid operand for code '%c'", code);
16203 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16204 (get_arm_condition_code (x))],
16208 /* Cirrus registers can be accessed in a variety of ways:
16209 single floating point (f)
16210 double floating point (d)
16212 64bit integer (dx). */
16213 case 'W': /* Cirrus register in F mode. */
16214 case 'X': /* Cirrus register in D mode. */
16215 case 'Y': /* Cirrus register in FX mode. */
16216 case 'Z': /* Cirrus register in DX mode. */
16217 gcc_assert (GET_CODE (x) == REG
16218 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16220 fprintf (stream, "mv%s%s",
16222 : code == 'X' ? "d"
16223 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16227 /* Print cirrus register in the mode specified by the register's mode. */
16230 int mode = GET_MODE (x);
16232 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16234 output_operand_lossage ("invalid operand for code '%c'", code);
16238 fprintf (stream, "mv%s%s",
16239 mode == DFmode ? "d"
16240 : mode == SImode ? "fx"
16241 : mode == DImode ? "dx"
16242 : "f", reg_names[REGNO (x)] + 2);
16248 if (GET_CODE (x) != REG
16249 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16250 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16251 /* Bad value for wCG register number. */
16253 output_operand_lossage ("invalid operand for code '%c'", code);
16258 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16261 /* Print an iWMMXt control register name. */
16263 if (GET_CODE (x) != CONST_INT
16265 || INTVAL (x) >= 16)
16266 /* Bad value for wC register number. */
16268 output_operand_lossage ("invalid operand for code '%c'", code);
16274 static const char * wc_reg_names [16] =
16276 "wCID", "wCon", "wCSSF", "wCASF",
16277 "wC4", "wC5", "wC6", "wC7",
16278 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16279 "wC12", "wC13", "wC14", "wC15"
16282 fprintf (stream, wc_reg_names [INTVAL (x)]);
16286 /* Print the high single-precision register of a VFP double-precision
16290 int mode = GET_MODE (x);
16293 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16295 output_operand_lossage ("invalid operand for code '%c'", code);
16300 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16302 output_operand_lossage ("invalid operand for code '%c'", code);
16306 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16310 /* Print a VFP/Neon double precision or quad precision register name. */
16314 int mode = GET_MODE (x);
16315 int is_quad = (code == 'q');
16318 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16320 output_operand_lossage ("invalid operand for code '%c'", code);
16324 if (GET_CODE (x) != REG
16325 || !IS_VFP_REGNUM (REGNO (x)))
16327 output_operand_lossage ("invalid operand for code '%c'", code);
16332 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16333 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16335 output_operand_lossage ("invalid operand for code '%c'", code);
16339 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16340 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16344 /* These two codes print the low/high doubleword register of a Neon quad
16345 register, respectively. For pair-structure types, can also print
16346 low/high quadword registers. */
16350 int mode = GET_MODE (x);
16353 if ((GET_MODE_SIZE (mode) != 16
16354 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16356 output_operand_lossage ("invalid operand for code '%c'", code);
16361 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16363 output_operand_lossage ("invalid operand for code '%c'", code);
16367 if (GET_MODE_SIZE (mode) == 16)
16368 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16369 + (code == 'f' ? 1 : 0));
16371 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16372 + (code == 'f' ? 1 : 0));
16376 /* Print a VFPv3 floating-point constant, represented as an integer
16380 int index = vfp3_const_double_index (x);
16381 gcc_assert (index != -1);
16382 fprintf (stream, "%d", index);
16386 /* Print bits representing opcode features for Neon.
16388 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16389 and polynomials as unsigned.
16391 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16393 Bit 2 is 1 for rounding functions, 0 otherwise. */
16395 /* Identify the type as 's', 'u', 'p' or 'f'. */
16398 HOST_WIDE_INT bits = INTVAL (x);
16399 fputc ("uspf"[bits & 3], stream);
16403 /* Likewise, but signed and unsigned integers are both 'i'. */
16406 HOST_WIDE_INT bits = INTVAL (x);
16407 fputc ("iipf"[bits & 3], stream);
16411 /* As for 'T', but emit 'u' instead of 'p'. */
16414 HOST_WIDE_INT bits = INTVAL (x);
16415 fputc ("usuf"[bits & 3], stream);
16419 /* Bit 2: rounding (vs none). */
16422 HOST_WIDE_INT bits = INTVAL (x);
16423 fputs ((bits & 4) != 0 ? "r" : "", stream);
16427 /* Memory operand for vld1/vst1 instruction. */
16431 bool postinc = FALSE;
16432 unsigned align, modesize, align_bits;
16434 gcc_assert (GET_CODE (x) == MEM);
16435 addr = XEXP (x, 0);
16436 if (GET_CODE (addr) == POST_INC)
16439 addr = XEXP (addr, 0);
16441 asm_fprintf (stream, "[%r", REGNO (addr));
16443 /* We know the alignment of this access, so we can emit a hint in the
16444 instruction (for some alignments) as an aid to the memory subsystem
16446 align = MEM_ALIGN (x) >> 3;
16447 modesize = GET_MODE_SIZE (GET_MODE (x));
16449 /* Only certain alignment specifiers are supported by the hardware. */
16450 if (modesize == 16 && (align % 32) == 0)
16452 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16454 else if ((align % 8) == 0)
16459 if (align_bits != 0)
16460 asm_fprintf (stream, ":%d", align_bits);
16462 asm_fprintf (stream, "]");
16465 fputs("!", stream);
16473 gcc_assert (GET_CODE (x) == MEM);
16474 addr = XEXP (x, 0);
16475 gcc_assert (GET_CODE (addr) == REG);
16476 asm_fprintf (stream, "[%r]", REGNO (addr));
16480 /* Translate an S register number into a D register number and element index. */
16483 int mode = GET_MODE (x);
16486 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16488 output_operand_lossage ("invalid operand for code '%c'", code);
16493 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16495 output_operand_lossage ("invalid operand for code '%c'", code);
16499 regno = regno - FIRST_VFP_REGNUM;
16500 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16504 /* Register specifier for vld1.16/vst1.16. Translate the S register
16505 number into a D register number and element index. */
16508 int mode = GET_MODE (x);
16511 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16513 output_operand_lossage ("invalid operand for code '%c'", code);
16518 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16520 output_operand_lossage ("invalid operand for code '%c'", code);
16524 regno = regno - FIRST_VFP_REGNUM;
16525 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16532 output_operand_lossage ("missing operand");
16536 switch (GET_CODE (x))
16539 asm_fprintf (stream, "%r", REGNO (x));
16543 output_memory_reference_mode = GET_MODE (x);
16544 output_address (XEXP (x, 0));
16551 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16552 sizeof (fpstr), 0, 1);
16553 fprintf (stream, "#%s", fpstr);
16556 fprintf (stream, "#%s", fp_immediate_constant (x));
16560 gcc_assert (GET_CODE (x) != NEG);
16561 fputc ('#', stream);
16562 if (GET_CODE (x) == HIGH)
16564 fputs (":lower16:", stream);
16568 output_addr_const (stream, x);
16574 /* Target hook for printing a memory address. */
16576 arm_print_operand_address (FILE *stream, rtx x)
16580 int is_minus = GET_CODE (x) == MINUS;
16582 if (GET_CODE (x) == REG)
16583 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16584 else if (GET_CODE (x) == PLUS || is_minus)
16586 rtx base = XEXP (x, 0);
16587 rtx index = XEXP (x, 1);
16588 HOST_WIDE_INT offset = 0;
16589 if (GET_CODE (base) != REG
16590 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16592 /* Ensure that BASE is a register. */
16593 /* (one of them must be). */
16594 /* Also ensure the SP is not used as in index register. */
16599 switch (GET_CODE (index))
16602 offset = INTVAL (index);
16605 asm_fprintf (stream, "[%r, #%wd]",
16606 REGNO (base), offset);
16610 asm_fprintf (stream, "[%r, %s%r]",
16611 REGNO (base), is_minus ? "-" : "",
16621 asm_fprintf (stream, "[%r, %s%r",
16622 REGNO (base), is_minus ? "-" : "",
16623 REGNO (XEXP (index, 0)));
16624 arm_print_operand (stream, index, 'S');
16625 fputs ("]", stream);
16630 gcc_unreachable ();
16633 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16634 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16636 extern enum machine_mode output_memory_reference_mode;
16638 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16640 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16641 asm_fprintf (stream, "[%r, #%s%d]!",
16642 REGNO (XEXP (x, 0)),
16643 GET_CODE (x) == PRE_DEC ? "-" : "",
16644 GET_MODE_SIZE (output_memory_reference_mode));
16646 asm_fprintf (stream, "[%r], #%s%d",
16647 REGNO (XEXP (x, 0)),
16648 GET_CODE (x) == POST_DEC ? "-" : "",
16649 GET_MODE_SIZE (output_memory_reference_mode));
16651 else if (GET_CODE (x) == PRE_MODIFY)
16653 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16654 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16655 asm_fprintf (stream, "#%wd]!",
16656 INTVAL (XEXP (XEXP (x, 1), 1)));
16658 asm_fprintf (stream, "%r]!",
16659 REGNO (XEXP (XEXP (x, 1), 1)));
16661 else if (GET_CODE (x) == POST_MODIFY)
16663 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16664 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16665 asm_fprintf (stream, "#%wd",
16666 INTVAL (XEXP (XEXP (x, 1), 1)));
16668 asm_fprintf (stream, "%r",
16669 REGNO (XEXP (XEXP (x, 1), 1)));
16671 else output_addr_const (stream, x);
16675 if (GET_CODE (x) == REG)
16676 asm_fprintf (stream, "[%r]", REGNO (x));
16677 else if (GET_CODE (x) == POST_INC)
16678 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16679 else if (GET_CODE (x) == PLUS)
16681 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16682 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16683 asm_fprintf (stream, "[%r, #%wd]",
16684 REGNO (XEXP (x, 0)),
16685 INTVAL (XEXP (x, 1)));
16687 asm_fprintf (stream, "[%r, %r]",
16688 REGNO (XEXP (x, 0)),
16689 REGNO (XEXP (x, 1)));
16692 output_addr_const (stream, x);
16696 /* Target hook for indicating whether a punctuation character for
16697 TARGET_PRINT_OPERAND is valid. */
16699 arm_print_operand_punct_valid_p (unsigned char code)
16701 return (code == '@' || code == '|' || code == '.'
16702 || code == '(' || code == ')' || code == '#'
16703 || (TARGET_32BIT && (code == '?'))
16704 || (TARGET_THUMB2 && (code == '!'))
16705 || (TARGET_THUMB && (code == '_')));
16708 /* Target hook for assembling integer objects. The ARM version needs to
16709 handle word-sized values specially. */
16711 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16713 enum machine_mode mode;
16715 if (size == UNITS_PER_WORD && aligned_p)
16717 fputs ("\t.word\t", asm_out_file);
16718 output_addr_const (asm_out_file, x);
16720 /* Mark symbols as position independent. We only do this in the
16721 .text segment, not in the .data segment. */
16722 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16723 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16725 /* See legitimize_pic_address for an explanation of the
16726 TARGET_VXWORKS_RTP check. */
16727 if (TARGET_VXWORKS_RTP
16728 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16729 fputs ("(GOT)", asm_out_file);
16731 fputs ("(GOTOFF)", asm_out_file);
16733 fputc ('\n', asm_out_file);
16737 mode = GET_MODE (x);
16739 if (arm_vector_mode_supported_p (mode))
16743 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16745 units = CONST_VECTOR_NUNITS (x);
16746 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16748 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16749 for (i = 0; i < units; i++)
16751 rtx elt = CONST_VECTOR_ELT (x, i);
16753 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16756 for (i = 0; i < units; i++)
16758 rtx elt = CONST_VECTOR_ELT (x, i);
16759 REAL_VALUE_TYPE rval;
16761 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16764 (rval, GET_MODE_INNER (mode),
16765 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16771 return default_assemble_integer (x, size, aligned_p);
16775 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16779 if (!TARGET_AAPCS_BASED)
16782 default_named_section_asm_out_constructor
16783 : default_named_section_asm_out_destructor) (symbol, priority);
16787 /* Put these in the .init_array section, using a special relocation. */
16788 if (priority != DEFAULT_INIT_PRIORITY)
16791 sprintf (buf, "%s.%.5u",
16792 is_ctor ? ".init_array" : ".fini_array",
16794 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16801 switch_to_section (s);
16802 assemble_align (POINTER_SIZE);
16803 fputs ("\t.word\t", asm_out_file);
16804 output_addr_const (asm_out_file, symbol);
16805 fputs ("(target1)\n", asm_out_file);
16808 /* Add a function to the list of static constructors. */
16811 arm_elf_asm_constructor (rtx symbol, int priority)
16813 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16816 /* Add a function to the list of static destructors. */
16819 arm_elf_asm_destructor (rtx symbol, int priority)
16821 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16824 /* A finite state machine takes care of noticing whether or not instructions
16825 can be conditionally executed, and thus decrease execution time and code
16826 size by deleting branch instructions. The fsm is controlled by
16827 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16829 /* The state of the fsm controlling condition codes are:
16830 0: normal, do nothing special
16831 1: make ASM_OUTPUT_OPCODE not output this instruction
16832 2: make ASM_OUTPUT_OPCODE not output this instruction
16833 3: make instructions conditional
16834 4: make instructions conditional
16836 State transitions (state->state by whom under condition):
16837 0 -> 1 final_prescan_insn if the `target' is a label
16838 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16839 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16840 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16841 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16842 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16843 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16844 (the target insn is arm_target_insn).
16846 If the jump clobbers the conditions then we use states 2 and 4.
16848 A similar thing can be done with conditional return insns.
16850 XXX In case the `target' is an unconditional branch, this conditionalising
16851 of the instructions always reduces code size, but not always execution
16852 time. But then, I want to reduce the code size to somewhere near what
16853 /bin/cc produces. */
16855 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16856 instructions. When a COND_EXEC instruction is seen the subsequent
16857 instructions are scanned so that multiple conditional instructions can be
16858 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16859 specify the length and true/false mask for the IT block. These will be
16860 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16862 /* Returns the index of the ARM condition code string in
16863 `arm_condition_codes'. COMPARISON should be an rtx like
16864 `(eq (...) (...))'. */
16865 static enum arm_cond_code
16866 get_arm_condition_code (rtx comparison)
16868 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16869 enum arm_cond_code code;
16870 enum rtx_code comp_code = GET_CODE (comparison);
16872 if (GET_MODE_CLASS (mode) != MODE_CC)
16873 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16874 XEXP (comparison, 1));
16878 case CC_DNEmode: code = ARM_NE; goto dominance;
16879 case CC_DEQmode: code = ARM_EQ; goto dominance;
16880 case CC_DGEmode: code = ARM_GE; goto dominance;
16881 case CC_DGTmode: code = ARM_GT; goto dominance;
16882 case CC_DLEmode: code = ARM_LE; goto dominance;
16883 case CC_DLTmode: code = ARM_LT; goto dominance;
16884 case CC_DGEUmode: code = ARM_CS; goto dominance;
16885 case CC_DGTUmode: code = ARM_HI; goto dominance;
16886 case CC_DLEUmode: code = ARM_LS; goto dominance;
16887 case CC_DLTUmode: code = ARM_CC;
16890 gcc_assert (comp_code == EQ || comp_code == NE);
16892 if (comp_code == EQ)
16893 return ARM_INVERSE_CONDITION_CODE (code);
16899 case NE: return ARM_NE;
16900 case EQ: return ARM_EQ;
16901 case GE: return ARM_PL;
16902 case LT: return ARM_MI;
16903 default: gcc_unreachable ();
16909 case NE: return ARM_NE;
16910 case EQ: return ARM_EQ;
16911 default: gcc_unreachable ();
16917 case NE: return ARM_MI;
16918 case EQ: return ARM_PL;
16919 default: gcc_unreachable ();
16924 /* These encodings assume that AC=1 in the FPA system control
16925 byte. This allows us to handle all cases except UNEQ and
16929 case GE: return ARM_GE;
16930 case GT: return ARM_GT;
16931 case LE: return ARM_LS;
16932 case LT: return ARM_MI;
16933 case NE: return ARM_NE;
16934 case EQ: return ARM_EQ;
16935 case ORDERED: return ARM_VC;
16936 case UNORDERED: return ARM_VS;
16937 case UNLT: return ARM_LT;
16938 case UNLE: return ARM_LE;
16939 case UNGT: return ARM_HI;
16940 case UNGE: return ARM_PL;
16941 /* UNEQ and LTGT do not have a representation. */
16942 case UNEQ: /* Fall through. */
16943 case LTGT: /* Fall through. */
16944 default: gcc_unreachable ();
16950 case NE: return ARM_NE;
16951 case EQ: return ARM_EQ;
16952 case GE: return ARM_LE;
16953 case GT: return ARM_LT;
16954 case LE: return ARM_GE;
16955 case LT: return ARM_GT;
16956 case GEU: return ARM_LS;
16957 case GTU: return ARM_CC;
16958 case LEU: return ARM_CS;
16959 case LTU: return ARM_HI;
16960 default: gcc_unreachable ();
16966 case LTU: return ARM_CS;
16967 case GEU: return ARM_CC;
16968 default: gcc_unreachable ();
16974 case NE: return ARM_NE;
16975 case EQ: return ARM_EQ;
16976 case GEU: return ARM_CS;
16977 case GTU: return ARM_HI;
16978 case LEU: return ARM_LS;
16979 case LTU: return ARM_CC;
16980 default: gcc_unreachable ();
16986 case GE: return ARM_GE;
16987 case LT: return ARM_LT;
16988 case GEU: return ARM_CS;
16989 case LTU: return ARM_CC;
16990 default: gcc_unreachable ();
16996 case NE: return ARM_NE;
16997 case EQ: return ARM_EQ;
16998 case GE: return ARM_GE;
16999 case GT: return ARM_GT;
17000 case LE: return ARM_LE;
17001 case LT: return ARM_LT;
17002 case GEU: return ARM_CS;
17003 case GTU: return ARM_HI;
17004 case LEU: return ARM_LS;
17005 case LTU: return ARM_CC;
17006 default: gcc_unreachable ();
17009 default: gcc_unreachable ();
17013 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17016 thumb2_final_prescan_insn (rtx insn)
17018 rtx first_insn = insn;
17019 rtx body = PATTERN (insn);
17021 enum arm_cond_code code;
17025 /* Remove the previous insn from the count of insns to be output. */
17026 if (arm_condexec_count)
17027 arm_condexec_count--;
17029 /* Nothing to do if we are already inside a conditional block. */
17030 if (arm_condexec_count)
17033 if (GET_CODE (body) != COND_EXEC)
17036 /* Conditional jumps are implemented directly. */
17037 if (GET_CODE (insn) == JUMP_INSN)
17040 predicate = COND_EXEC_TEST (body);
17041 arm_current_cc = get_arm_condition_code (predicate);
17043 n = get_attr_ce_count (insn);
17044 arm_condexec_count = 1;
17045 arm_condexec_mask = (1 << n) - 1;
17046 arm_condexec_masklen = n;
17047 /* See if subsequent instructions can be combined into the same block. */
17050 insn = next_nonnote_insn (insn);
17052 /* Jumping into the middle of an IT block is illegal, so a label or
17053 barrier terminates the block. */
17054 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17057 body = PATTERN (insn);
17058 /* USE and CLOBBER aren't really insns, so just skip them. */
17059 if (GET_CODE (body) == USE
17060 || GET_CODE (body) == CLOBBER)
17063 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17064 if (GET_CODE (body) != COND_EXEC)
17066 /* Allow up to 4 conditionally executed instructions in a block. */
17067 n = get_attr_ce_count (insn);
17068 if (arm_condexec_masklen + n > 4)
17071 predicate = COND_EXEC_TEST (body);
17072 code = get_arm_condition_code (predicate);
17073 mask = (1 << n) - 1;
17074 if (arm_current_cc == code)
17075 arm_condexec_mask |= (mask << arm_condexec_masklen);
17076 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17079 arm_condexec_count++;
17080 arm_condexec_masklen += n;
17082 /* A jump must be the last instruction in a conditional block. */
17083 if (GET_CODE(insn) == JUMP_INSN)
17086 /* Restore recog_data (getting the attributes of other insns can
17087 destroy this array, but final.c assumes that it remains intact
17088 across this call). */
17089 extract_constrain_insn_cached (first_insn);
17093 arm_final_prescan_insn (rtx insn)
17095 /* BODY will hold the body of INSN. */
17096 rtx body = PATTERN (insn);
17098 /* This will be 1 if trying to repeat the trick, and things need to be
17099 reversed if it appears to fail. */
17102 /* If we start with a return insn, we only succeed if we find another one. */
17103 int seeking_return = 0;
17105 /* START_INSN will hold the insn from where we start looking. This is the
17106 first insn after the following code_label if REVERSE is true. */
17107 rtx start_insn = insn;
17109 /* If in state 4, check if the target branch is reached, in order to
17110 change back to state 0. */
17111 if (arm_ccfsm_state == 4)
17113 if (insn == arm_target_insn)
17115 arm_target_insn = NULL;
17116 arm_ccfsm_state = 0;
17121 /* If in state 3, it is possible to repeat the trick, if this insn is an
17122 unconditional branch to a label, and immediately following this branch
17123 is the previous target label which is only used once, and the label this
17124 branch jumps to is not too far off. */
17125 if (arm_ccfsm_state == 3)
17127 if (simplejump_p (insn))
17129 start_insn = next_nonnote_insn (start_insn);
17130 if (GET_CODE (start_insn) == BARRIER)
17132 /* XXX Isn't this always a barrier? */
17133 start_insn = next_nonnote_insn (start_insn);
17135 if (GET_CODE (start_insn) == CODE_LABEL
17136 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17137 && LABEL_NUSES (start_insn) == 1)
17142 else if (GET_CODE (body) == RETURN)
17144 start_insn = next_nonnote_insn (start_insn);
17145 if (GET_CODE (start_insn) == BARRIER)
17146 start_insn = next_nonnote_insn (start_insn);
17147 if (GET_CODE (start_insn) == CODE_LABEL
17148 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17149 && LABEL_NUSES (start_insn) == 1)
17152 seeking_return = 1;
17161 gcc_assert (!arm_ccfsm_state || reverse);
17162 if (GET_CODE (insn) != JUMP_INSN)
17165 /* This jump might be paralleled with a clobber of the condition codes
17166 the jump should always come first */
17167 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17168 body = XVECEXP (body, 0, 0);
17171 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17172 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17175 int fail = FALSE, succeed = FALSE;
17176 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17177 int then_not_else = TRUE;
17178 rtx this_insn = start_insn, label = 0;
17180 /* Register the insn jumped to. */
17183 if (!seeking_return)
17184 label = XEXP (SET_SRC (body), 0);
17186 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17187 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17188 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17190 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17191 then_not_else = FALSE;
17193 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17194 seeking_return = 1;
17195 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17197 seeking_return = 1;
17198 then_not_else = FALSE;
17201 gcc_unreachable ();
17203 /* See how many insns this branch skips, and what kind of insns. If all
17204 insns are okay, and the label or unconditional branch to the same
17205 label is not too far away, succeed. */
17206 for (insns_skipped = 0;
17207 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17211 this_insn = next_nonnote_insn (this_insn);
17215 switch (GET_CODE (this_insn))
17218 /* Succeed if it is the target label, otherwise fail since
17219 control falls in from somewhere else. */
17220 if (this_insn == label)
17222 arm_ccfsm_state = 1;
17230 /* Succeed if the following insn is the target label.
17232 If return insns are used then the last insn in a function
17233 will be a barrier. */
17234 this_insn = next_nonnote_insn (this_insn);
17235 if (this_insn && this_insn == label)
17237 arm_ccfsm_state = 1;
17245 /* The AAPCS says that conditional calls should not be
17246 used since they make interworking inefficient (the
17247 linker can't transform BL<cond> into BLX). That's
17248 only a problem if the machine has BLX. */
17255 /* Succeed if the following insn is the target label, or
17256 if the following two insns are a barrier and the
17258 this_insn = next_nonnote_insn (this_insn);
17259 if (this_insn && GET_CODE (this_insn) == BARRIER)
17260 this_insn = next_nonnote_insn (this_insn);
17262 if (this_insn && this_insn == label
17263 && insns_skipped < max_insns_skipped)
17265 arm_ccfsm_state = 1;
17273 /* If this is an unconditional branch to the same label, succeed.
17274 If it is to another label, do nothing. If it is conditional,
17276 /* XXX Probably, the tests for SET and the PC are
17279 scanbody = PATTERN (this_insn);
17280 if (GET_CODE (scanbody) == SET
17281 && GET_CODE (SET_DEST (scanbody)) == PC)
17283 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17284 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17286 arm_ccfsm_state = 2;
17289 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17292 /* Fail if a conditional return is undesirable (e.g. on a
17293 StrongARM), but still allow this if optimizing for size. */
17294 else if (GET_CODE (scanbody) == RETURN
17295 && !use_return_insn (TRUE, NULL)
17298 else if (GET_CODE (scanbody) == RETURN
17301 arm_ccfsm_state = 2;
17304 else if (GET_CODE (scanbody) == PARALLEL)
17306 switch (get_attr_conds (this_insn))
17316 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17321 /* Instructions using or affecting the condition codes make it
17323 scanbody = PATTERN (this_insn);
17324 if (!(GET_CODE (scanbody) == SET
17325 || GET_CODE (scanbody) == PARALLEL)
17326 || get_attr_conds (this_insn) != CONDS_NOCOND)
17329 /* A conditional cirrus instruction must be followed by
17330 a non Cirrus instruction. However, since we
17331 conditionalize instructions in this function and by
17332 the time we get here we can't add instructions
17333 (nops), because shorten_branches() has already been
17334 called, we will disable conditionalizing Cirrus
17335 instructions to be safe. */
17336 if (GET_CODE (scanbody) != USE
17337 && GET_CODE (scanbody) != CLOBBER
17338 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17348 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17349 arm_target_label = CODE_LABEL_NUMBER (label);
17352 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17354 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17356 this_insn = next_nonnote_insn (this_insn);
17357 gcc_assert (!this_insn
17358 || (GET_CODE (this_insn) != BARRIER
17359 && GET_CODE (this_insn) != CODE_LABEL));
17363 /* Oh, dear! we ran off the end.. give up. */
17364 extract_constrain_insn_cached (insn);
17365 arm_ccfsm_state = 0;
17366 arm_target_insn = NULL;
17369 arm_target_insn = this_insn;
17372 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17375 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17377 if (reverse || then_not_else)
17378 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17381 /* Restore recog_data (getting the attributes of other insns can
17382 destroy this array, but final.c assumes that it remains intact
17383 across this call. */
17384 extract_constrain_insn_cached (insn);
17388 /* Output IT instructions. */
17390 thumb2_asm_output_opcode (FILE * stream)
17395 if (arm_condexec_mask)
17397 for (n = 0; n < arm_condexec_masklen; n++)
17398 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17400 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17401 arm_condition_codes[arm_current_cc]);
17402 arm_condexec_mask = 0;
17406 /* Returns true if REGNO is a valid register
17407 for holding a quantity of type MODE. */
17409 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17411 if (GET_MODE_CLASS (mode) == MODE_CC)
17412 return (regno == CC_REGNUM
17413 || (TARGET_HARD_FLOAT && TARGET_VFP
17414 && regno == VFPCC_REGNUM));
17417 /* For the Thumb we only allow values bigger than SImode in
17418 registers 0 - 6, so that there is always a second low
17419 register available to hold the upper part of the value.
17420 We probably we ought to ensure that the register is the
17421 start of an even numbered register pair. */
17422 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17424 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17425 && IS_CIRRUS_REGNUM (regno))
17426 /* We have outlawed SI values in Cirrus registers because they
17427 reside in the lower 32 bits, but SF values reside in the
17428 upper 32 bits. This causes gcc all sorts of grief. We can't
17429 even split the registers into pairs because Cirrus SI values
17430 get sign extended to 64bits-- aldyh. */
17431 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17433 if (TARGET_HARD_FLOAT && TARGET_VFP
17434 && IS_VFP_REGNUM (regno))
17436 if (mode == SFmode || mode == SImode)
17437 return VFP_REGNO_OK_FOR_SINGLE (regno);
17439 if (mode == DFmode)
17440 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17442 /* VFP registers can hold HFmode values, but there is no point in
17443 putting them there unless we have hardware conversion insns. */
17444 if (mode == HFmode)
17445 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17448 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17449 || (VALID_NEON_QREG_MODE (mode)
17450 && NEON_REGNO_OK_FOR_QUAD (regno))
17451 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17452 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17453 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17454 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17455 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17460 if (TARGET_REALLY_IWMMXT)
17462 if (IS_IWMMXT_GR_REGNUM (regno))
17463 return mode == SImode;
17465 if (IS_IWMMXT_REGNUM (regno))
17466 return VALID_IWMMXT_REG_MODE (mode);
17469 /* We allow almost any value to be stored in the general registers.
17470 Restrict doubleword quantities to even register pairs so that we can
17471 use ldrd. Do not allow very large Neon structure opaque modes in
17472 general registers; they would use too many. */
17473 if (regno <= LAST_ARM_REGNUM)
17474 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17475 && ARM_NUM_REGS (mode) <= 4;
17477 if (regno == FRAME_POINTER_REGNUM
17478 || regno == ARG_POINTER_REGNUM)
17479 /* We only allow integers in the fake hard registers. */
17480 return GET_MODE_CLASS (mode) == MODE_INT;
17482 /* The only registers left are the FPA registers
17483 which we only allow to hold FP values. */
17484 return (TARGET_HARD_FLOAT && TARGET_FPA
17485 && GET_MODE_CLASS (mode) == MODE_FLOAT
17486 && regno >= FIRST_FPA_REGNUM
17487 && regno <= LAST_FPA_REGNUM);
17490 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17491 not used in arm mode. */
17494 arm_regno_class (int regno)
17498 if (regno == STACK_POINTER_REGNUM)
17500 if (regno == CC_REGNUM)
17507 if (TARGET_THUMB2 && regno < 8)
17510 if ( regno <= LAST_ARM_REGNUM
17511 || regno == FRAME_POINTER_REGNUM
17512 || regno == ARG_POINTER_REGNUM)
17513 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17515 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17516 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17518 if (IS_CIRRUS_REGNUM (regno))
17519 return CIRRUS_REGS;
17521 if (IS_VFP_REGNUM (regno))
17523 if (regno <= D7_VFP_REGNUM)
17524 return VFP_D0_D7_REGS;
17525 else if (regno <= LAST_LO_VFP_REGNUM)
17526 return VFP_LO_REGS;
17528 return VFP_HI_REGS;
17531 if (IS_IWMMXT_REGNUM (regno))
17532 return IWMMXT_REGS;
17534 if (IS_IWMMXT_GR_REGNUM (regno))
17535 return IWMMXT_GR_REGS;
17540 /* Handle a special case when computing the offset
17541 of an argument from the frame pointer. */
17543 arm_debugger_arg_offset (int value, rtx addr)
17547 /* We are only interested if dbxout_parms() failed to compute the offset. */
17551 /* We can only cope with the case where the address is held in a register. */
17552 if (GET_CODE (addr) != REG)
17555 /* If we are using the frame pointer to point at the argument, then
17556 an offset of 0 is correct. */
17557 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17560 /* If we are using the stack pointer to point at the
17561 argument, then an offset of 0 is correct. */
17562 /* ??? Check this is consistent with thumb2 frame layout. */
17563 if ((TARGET_THUMB || !frame_pointer_needed)
17564 && REGNO (addr) == SP_REGNUM)
17567 /* Oh dear. The argument is pointed to by a register rather
17568 than being held in a register, or being stored at a known
17569 offset from the frame pointer. Since GDB only understands
17570 those two kinds of argument we must translate the address
17571 held in the register into an offset from the frame pointer.
17572 We do this by searching through the insns for the function
17573 looking to see where this register gets its value. If the
17574 register is initialized from the frame pointer plus an offset
17575 then we are in luck and we can continue, otherwise we give up.
17577 This code is exercised by producing debugging information
17578 for a function with arguments like this:
17580 double func (double a, double b, int c, double d) {return d;}
17582 Without this code the stab for parameter 'd' will be set to
17583 an offset of 0 from the frame pointer, rather than 8. */
17585 /* The if() statement says:
17587 If the insn is a normal instruction
17588 and if the insn is setting the value in a register
17589 and if the register being set is the register holding the address of the argument
17590 and if the address is computing by an addition
17591 that involves adding to a register
17592 which is the frame pointer
17597 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17599 if ( GET_CODE (insn) == INSN
17600 && GET_CODE (PATTERN (insn)) == SET
17601 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17602 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17603 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17604 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17605 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17608 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17617 warning (0, "unable to compute real location of stacked parameter");
17618 value = 8; /* XXX magic hack */
17624 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17627 if ((MASK) & insn_flags) \
17628 add_builtin_function ((NAME), (TYPE), (CODE), \
17629 BUILT_IN_MD, NULL, NULL_TREE); \
17633 struct builtin_description
17635 const unsigned int mask;
17636 const enum insn_code icode;
17637 const char * const name;
17638 const enum arm_builtins code;
17639 const enum rtx_code comparison;
17640 const unsigned int flag;
17643 static const struct builtin_description bdesc_2arg[] =
17645 #define IWMMXT_BUILTIN(code, string, builtin) \
17646 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17647 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17649 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17650 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17651 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17652 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17653 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17654 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17655 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17656 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17657 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17658 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17659 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17660 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17661 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17662 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17663 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17664 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17665 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17666 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17667 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17668 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17669 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17670 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17671 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17672 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17673 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17674 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17675 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17676 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17677 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17678 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17679 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17680 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17681 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17682 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17683 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17684 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17685 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17686 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17687 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17688 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17689 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17690 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17691 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17692 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17693 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17694 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17695 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17696 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17697 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17698 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17699 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17700 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17701 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17702 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17703 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17704 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17705 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17706 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17708 #define IWMMXT_BUILTIN2(code, builtin) \
17709 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17711 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17712 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17713 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17714 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17715 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17716 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17717 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17718 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17719 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17720 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17721 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17722 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17723 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17724 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17725 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17726 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17727 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17728 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17729 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17730 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17731 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17732 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17733 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17734 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17735 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17736 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17737 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17738 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17739 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17740 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17741 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17742 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17745 static const struct builtin_description bdesc_1arg[] =
17747 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17748 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17749 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17750 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17751 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17752 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17753 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17754 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17755 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17756 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17757 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17758 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17759 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17760 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17761 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17762 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17763 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17764 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17767 /* Set up all the iWMMXt builtins. This is
17768 not called if TARGET_IWMMXT is zero. */
17771 arm_init_iwmmxt_builtins (void)
17773 const struct builtin_description * d;
17775 tree endlink = void_list_node;
17777 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17778 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17779 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17782 = build_function_type (integer_type_node,
17783 tree_cons (NULL_TREE, integer_type_node, endlink));
17784 tree v8qi_ftype_v8qi_v8qi_int
17785 = build_function_type (V8QI_type_node,
17786 tree_cons (NULL_TREE, V8QI_type_node,
17787 tree_cons (NULL_TREE, V8QI_type_node,
17788 tree_cons (NULL_TREE,
17791 tree v4hi_ftype_v4hi_int
17792 = build_function_type (V4HI_type_node,
17793 tree_cons (NULL_TREE, V4HI_type_node,
17794 tree_cons (NULL_TREE, integer_type_node,
17796 tree v2si_ftype_v2si_int
17797 = build_function_type (V2SI_type_node,
17798 tree_cons (NULL_TREE, V2SI_type_node,
17799 tree_cons (NULL_TREE, integer_type_node,
17801 tree v2si_ftype_di_di
17802 = build_function_type (V2SI_type_node,
17803 tree_cons (NULL_TREE, long_long_integer_type_node,
17804 tree_cons (NULL_TREE, long_long_integer_type_node,
17806 tree di_ftype_di_int
17807 = build_function_type (long_long_integer_type_node,
17808 tree_cons (NULL_TREE, long_long_integer_type_node,
17809 tree_cons (NULL_TREE, integer_type_node,
17811 tree di_ftype_di_int_int
17812 = build_function_type (long_long_integer_type_node,
17813 tree_cons (NULL_TREE, long_long_integer_type_node,
17814 tree_cons (NULL_TREE, integer_type_node,
17815 tree_cons (NULL_TREE,
17818 tree int_ftype_v8qi
17819 = build_function_type (integer_type_node,
17820 tree_cons (NULL_TREE, V8QI_type_node,
17822 tree int_ftype_v4hi
17823 = build_function_type (integer_type_node,
17824 tree_cons (NULL_TREE, V4HI_type_node,
17826 tree int_ftype_v2si
17827 = build_function_type (integer_type_node,
17828 tree_cons (NULL_TREE, V2SI_type_node,
17830 tree int_ftype_v8qi_int
17831 = build_function_type (integer_type_node,
17832 tree_cons (NULL_TREE, V8QI_type_node,
17833 tree_cons (NULL_TREE, integer_type_node,
17835 tree int_ftype_v4hi_int
17836 = build_function_type (integer_type_node,
17837 tree_cons (NULL_TREE, V4HI_type_node,
17838 tree_cons (NULL_TREE, integer_type_node,
17840 tree int_ftype_v2si_int
17841 = build_function_type (integer_type_node,
17842 tree_cons (NULL_TREE, V2SI_type_node,
17843 tree_cons (NULL_TREE, integer_type_node,
17845 tree v8qi_ftype_v8qi_int_int
17846 = build_function_type (V8QI_type_node,
17847 tree_cons (NULL_TREE, V8QI_type_node,
17848 tree_cons (NULL_TREE, integer_type_node,
17849 tree_cons (NULL_TREE,
17852 tree v4hi_ftype_v4hi_int_int
17853 = build_function_type (V4HI_type_node,
17854 tree_cons (NULL_TREE, V4HI_type_node,
17855 tree_cons (NULL_TREE, integer_type_node,
17856 tree_cons (NULL_TREE,
17859 tree v2si_ftype_v2si_int_int
17860 = build_function_type (V2SI_type_node,
17861 tree_cons (NULL_TREE, V2SI_type_node,
17862 tree_cons (NULL_TREE, integer_type_node,
17863 tree_cons (NULL_TREE,
17866 /* Miscellaneous. */
17867 tree v8qi_ftype_v4hi_v4hi
17868 = build_function_type (V8QI_type_node,
17869 tree_cons (NULL_TREE, V4HI_type_node,
17870 tree_cons (NULL_TREE, V4HI_type_node,
17872 tree v4hi_ftype_v2si_v2si
17873 = build_function_type (V4HI_type_node,
17874 tree_cons (NULL_TREE, V2SI_type_node,
17875 tree_cons (NULL_TREE, V2SI_type_node,
17877 tree v2si_ftype_v4hi_v4hi
17878 = build_function_type (V2SI_type_node,
17879 tree_cons (NULL_TREE, V4HI_type_node,
17880 tree_cons (NULL_TREE, V4HI_type_node,
17882 tree v2si_ftype_v8qi_v8qi
17883 = build_function_type (V2SI_type_node,
17884 tree_cons (NULL_TREE, V8QI_type_node,
17885 tree_cons (NULL_TREE, V8QI_type_node,
17887 tree v4hi_ftype_v4hi_di
17888 = build_function_type (V4HI_type_node,
17889 tree_cons (NULL_TREE, V4HI_type_node,
17890 tree_cons (NULL_TREE,
17891 long_long_integer_type_node,
17893 tree v2si_ftype_v2si_di
17894 = build_function_type (V2SI_type_node,
17895 tree_cons (NULL_TREE, V2SI_type_node,
17896 tree_cons (NULL_TREE,
17897 long_long_integer_type_node,
17899 tree void_ftype_int_int
17900 = build_function_type (void_type_node,
17901 tree_cons (NULL_TREE, integer_type_node,
17902 tree_cons (NULL_TREE, integer_type_node,
17905 = build_function_type (long_long_unsigned_type_node, endlink);
17907 = build_function_type (long_long_integer_type_node,
17908 tree_cons (NULL_TREE, V8QI_type_node,
17911 = build_function_type (long_long_integer_type_node,
17912 tree_cons (NULL_TREE, V4HI_type_node,
17915 = build_function_type (long_long_integer_type_node,
17916 tree_cons (NULL_TREE, V2SI_type_node,
17918 tree v2si_ftype_v4hi
17919 = build_function_type (V2SI_type_node,
17920 tree_cons (NULL_TREE, V4HI_type_node,
17922 tree v4hi_ftype_v8qi
17923 = build_function_type (V4HI_type_node,
17924 tree_cons (NULL_TREE, V8QI_type_node,
17927 tree di_ftype_di_v4hi_v4hi
17928 = build_function_type (long_long_unsigned_type_node,
17929 tree_cons (NULL_TREE,
17930 long_long_unsigned_type_node,
17931 tree_cons (NULL_TREE, V4HI_type_node,
17932 tree_cons (NULL_TREE,
17936 tree di_ftype_v4hi_v4hi
17937 = build_function_type (long_long_unsigned_type_node,
17938 tree_cons (NULL_TREE, V4HI_type_node,
17939 tree_cons (NULL_TREE, V4HI_type_node,
17942 /* Normal vector binops. */
17943 tree v8qi_ftype_v8qi_v8qi
17944 = build_function_type (V8QI_type_node,
17945 tree_cons (NULL_TREE, V8QI_type_node,
17946 tree_cons (NULL_TREE, V8QI_type_node,
17948 tree v4hi_ftype_v4hi_v4hi
17949 = build_function_type (V4HI_type_node,
17950 tree_cons (NULL_TREE, V4HI_type_node,
17951 tree_cons (NULL_TREE, V4HI_type_node,
17953 tree v2si_ftype_v2si_v2si
17954 = build_function_type (V2SI_type_node,
17955 tree_cons (NULL_TREE, V2SI_type_node,
17956 tree_cons (NULL_TREE, V2SI_type_node,
17958 tree di_ftype_di_di
17959 = build_function_type (long_long_unsigned_type_node,
17960 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17961 tree_cons (NULL_TREE,
17962 long_long_unsigned_type_node,
17965 /* Add all builtins that are more or less simple operations on two
17967 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17969 /* Use one of the operands; the target can have a different mode for
17970 mask-generating compares. */
17971 enum machine_mode mode;
17977 mode = insn_data[d->icode].operand[1].mode;
17982 type = v8qi_ftype_v8qi_v8qi;
17985 type = v4hi_ftype_v4hi_v4hi;
17988 type = v2si_ftype_v2si_v2si;
17991 type = di_ftype_di_di;
17995 gcc_unreachable ();
17998 def_mbuiltin (d->mask, d->name, type, d->code);
18001 /* Add the remaining MMX insns with somewhat more complicated types. */
18002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18053 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18060 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18067 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18069 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18074 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18077 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18079 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18082 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18084 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18086 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18088 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18090 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18094 arm_init_tls_builtins (void)
18098 ftype = build_function_type (ptr_type_node, void_list_node);
18099 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18100 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18102 TREE_NOTHROW (decl) = 1;
18103 TREE_READONLY (decl) = 1;
18106 enum neon_builtin_type_bits {
18122 #define v8qi_UP T_V8QI
18123 #define v4hi_UP T_V4HI
18124 #define v2si_UP T_V2SI
18125 #define v2sf_UP T_V2SF
18127 #define v16qi_UP T_V16QI
18128 #define v8hi_UP T_V8HI
18129 #define v4si_UP T_V4SI
18130 #define v4sf_UP T_V4SF
18131 #define v2di_UP T_V2DI
18136 #define UP(X) X##_UP
18171 NEON_LOADSTRUCTLANE,
18173 NEON_STORESTRUCTLANE,
18182 const neon_itype itype;
18184 const enum insn_code codes[T_MAX];
18185 const unsigned int num_vars;
18186 unsigned int base_fcode;
18187 } neon_builtin_datum;
18189 #define CF(N,X) CODE_FOR_neon_##N##X
18191 #define VAR1(T, N, A) \
18192 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18193 #define VAR2(T, N, A, B) \
18194 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18195 #define VAR3(T, N, A, B, C) \
18196 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18197 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18198 #define VAR4(T, N, A, B, C, D) \
18199 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18200 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18201 #define VAR5(T, N, A, B, C, D, E) \
18202 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18203 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18204 #define VAR6(T, N, A, B, C, D, E, F) \
18205 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18206 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18207 #define VAR7(T, N, A, B, C, D, E, F, G) \
18208 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18209 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18211 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18212 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18214 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18215 CF (N, G), CF (N, H) }, 8, 0
18216 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18217 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18218 | UP (H) | UP (I), \
18219 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18220 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18221 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18222 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18223 | UP (H) | UP (I) | UP (J), \
18224 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18225 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18227 /* The mode entries in the following table correspond to the "key" type of the
18228 instruction variant, i.e. equivalent to that which would be specified after
18229 the assembler mnemonic, which usually refers to the last vector operand.
18230 (Signed/unsigned/polynomial types are not differentiated between though, and
18231 are all mapped onto the same mode for a given element size.) The modes
18232 listed per instruction should be the same as those defined for that
18233 instruction's pattern in neon.md.
18234 WARNING: Variants should be listed in the same increasing order as
18235 neon_builtin_type_bits. */
18237 static neon_builtin_datum neon_builtin_data[] =
18239 { VAR10 (BINOP, vadd,
18240 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18241 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18242 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18243 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18244 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18245 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18246 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18247 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18248 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18249 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18250 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18251 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18252 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18253 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18254 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18255 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18256 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18257 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18258 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18259 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18260 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18261 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18262 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18263 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18264 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18265 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18266 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18267 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18268 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18269 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18270 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18271 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18272 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18273 { VAR10 (BINOP, vsub,
18274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18275 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18276 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18277 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18278 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18279 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18280 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18281 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18282 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18283 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18284 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18285 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18286 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18287 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18288 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18289 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18290 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18291 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18292 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18293 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18294 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18295 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18296 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18297 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18298 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18299 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18300 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18301 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18302 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18303 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18304 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18305 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18306 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18307 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18308 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18309 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18310 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18311 /* FIXME: vget_lane supports more variants than this! */
18312 { VAR10 (GETLANE, vget_lane,
18313 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18314 { VAR10 (SETLANE, vset_lane,
18315 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18316 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18317 { VAR10 (DUP, vdup_n,
18318 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18319 { VAR10 (DUPLANE, vdup_lane,
18320 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18321 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18322 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18323 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18324 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18325 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18326 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18327 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18328 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18329 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18330 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18331 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18332 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18333 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18334 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18335 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18336 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18337 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18338 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18339 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18340 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18341 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18342 { VAR10 (BINOP, vext,
18343 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18344 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18345 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18346 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18347 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18348 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18349 { VAR10 (SELECT, vbsl,
18350 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18351 { VAR1 (VTBL, vtbl1, v8qi) },
18352 { VAR1 (VTBL, vtbl2, v8qi) },
18353 { VAR1 (VTBL, vtbl3, v8qi) },
18354 { VAR1 (VTBL, vtbl4, v8qi) },
18355 { VAR1 (VTBX, vtbx1, v8qi) },
18356 { VAR1 (VTBX, vtbx2, v8qi) },
18357 { VAR1 (VTBX, vtbx3, v8qi) },
18358 { VAR1 (VTBX, vtbx4, v8qi) },
18359 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18360 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18361 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18362 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18363 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18364 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18365 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18366 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18367 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18368 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18369 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18370 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18371 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18372 { VAR10 (LOAD1, vld1,
18373 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18374 { VAR10 (LOAD1LANE, vld1_lane,
18375 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18376 { VAR10 (LOAD1, vld1_dup,
18377 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18378 { VAR10 (STORE1, vst1,
18379 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18380 { VAR10 (STORE1LANE, vst1_lane,
18381 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18382 { VAR9 (LOADSTRUCT,
18383 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18384 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18385 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18386 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18387 { VAR9 (STORESTRUCT, vst2,
18388 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18389 { VAR7 (STORESTRUCTLANE, vst2_lane,
18390 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18391 { VAR9 (LOADSTRUCT,
18392 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18393 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18394 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18395 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18396 { VAR9 (STORESTRUCT, vst3,
18397 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18398 { VAR7 (STORESTRUCTLANE, vst3_lane,
18399 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18400 { VAR9 (LOADSTRUCT, vld4,
18401 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18402 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18403 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18404 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18405 { VAR9 (STORESTRUCT, vst4,
18406 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18407 { VAR7 (STORESTRUCTLANE, vst4_lane,
18408 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18409 { VAR10 (LOGICBINOP, vand,
18410 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18411 { VAR10 (LOGICBINOP, vorr,
18412 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18413 { VAR10 (BINOP, veor,
18414 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18415 { VAR10 (LOGICBINOP, vbic,
18416 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18417 { VAR10 (LOGICBINOP, vorn,
18418 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18434 arm_init_neon_builtins (void)
18436 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18438 tree neon_intQI_type_node;
18439 tree neon_intHI_type_node;
18440 tree neon_polyQI_type_node;
18441 tree neon_polyHI_type_node;
18442 tree neon_intSI_type_node;
18443 tree neon_intDI_type_node;
18444 tree neon_float_type_node;
18446 tree intQI_pointer_node;
18447 tree intHI_pointer_node;
18448 tree intSI_pointer_node;
18449 tree intDI_pointer_node;
18450 tree float_pointer_node;
18452 tree const_intQI_node;
18453 tree const_intHI_node;
18454 tree const_intSI_node;
18455 tree const_intDI_node;
18456 tree const_float_node;
18458 tree const_intQI_pointer_node;
18459 tree const_intHI_pointer_node;
18460 tree const_intSI_pointer_node;
18461 tree const_intDI_pointer_node;
18462 tree const_float_pointer_node;
18464 tree V8QI_type_node;
18465 tree V4HI_type_node;
18466 tree V2SI_type_node;
18467 tree V2SF_type_node;
18468 tree V16QI_type_node;
18469 tree V8HI_type_node;
18470 tree V4SI_type_node;
18471 tree V4SF_type_node;
18472 tree V2DI_type_node;
18474 tree intUQI_type_node;
18475 tree intUHI_type_node;
18476 tree intUSI_type_node;
18477 tree intUDI_type_node;
18479 tree intEI_type_node;
18480 tree intOI_type_node;
18481 tree intCI_type_node;
18482 tree intXI_type_node;
18484 tree V8QI_pointer_node;
18485 tree V4HI_pointer_node;
18486 tree V2SI_pointer_node;
18487 tree V2SF_pointer_node;
18488 tree V16QI_pointer_node;
18489 tree V8HI_pointer_node;
18490 tree V4SI_pointer_node;
18491 tree V4SF_pointer_node;
18492 tree V2DI_pointer_node;
18494 tree void_ftype_pv8qi_v8qi_v8qi;
18495 tree void_ftype_pv4hi_v4hi_v4hi;
18496 tree void_ftype_pv2si_v2si_v2si;
18497 tree void_ftype_pv2sf_v2sf_v2sf;
18498 tree void_ftype_pdi_di_di;
18499 tree void_ftype_pv16qi_v16qi_v16qi;
18500 tree void_ftype_pv8hi_v8hi_v8hi;
18501 tree void_ftype_pv4si_v4si_v4si;
18502 tree void_ftype_pv4sf_v4sf_v4sf;
18503 tree void_ftype_pv2di_v2di_v2di;
18505 tree reinterp_ftype_dreg[5][5];
18506 tree reinterp_ftype_qreg[5][5];
18507 tree dreg_types[5], qreg_types[5];
18509 /* Create distinguished type nodes for NEON vector element types,
18510 and pointers to values of such types, so we can detect them later. */
18511 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18512 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18513 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18514 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18515 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18516 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18517 neon_float_type_node = make_node (REAL_TYPE);
18518 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18519 layout_type (neon_float_type_node);
18521 /* Define typedefs which exactly correspond to the modes we are basing vector
18522 types on. If you change these names you'll need to change
18523 the table used by arm_mangle_type too. */
18524 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18525 "__builtin_neon_qi");
18526 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18527 "__builtin_neon_hi");
18528 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18529 "__builtin_neon_si");
18530 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18531 "__builtin_neon_sf");
18532 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18533 "__builtin_neon_di");
18534 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18535 "__builtin_neon_poly8");
18536 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18537 "__builtin_neon_poly16");
18539 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18540 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18541 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18542 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18543 float_pointer_node = build_pointer_type (neon_float_type_node);
18545 /* Next create constant-qualified versions of the above types. */
18546 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18548 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18550 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18552 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18554 const_float_node = build_qualified_type (neon_float_type_node,
18557 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18558 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18559 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18560 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18561 const_float_pointer_node = build_pointer_type (const_float_node);
18563 /* Now create vector types based on our NEON element types. */
18564 /* 64-bit vectors. */
18566 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18568 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18570 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18572 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18573 /* 128-bit vectors. */
18575 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18577 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18579 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18581 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18583 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18585 /* Unsigned integer types for various mode sizes. */
18586 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18587 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18588 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18589 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18591 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18592 "__builtin_neon_uqi");
18593 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18594 "__builtin_neon_uhi");
18595 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18596 "__builtin_neon_usi");
18597 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18598 "__builtin_neon_udi");
18600 /* Opaque integer types for structures of vectors. */
18601 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18602 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18603 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18604 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18606 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18607 "__builtin_neon_ti");
18608 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18609 "__builtin_neon_ei");
18610 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18611 "__builtin_neon_oi");
18612 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18613 "__builtin_neon_ci");
18614 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18615 "__builtin_neon_xi");
18617 /* Pointers to vector types. */
18618 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18619 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18620 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18621 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18622 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18623 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18624 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18625 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18626 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18628 /* Operations which return results as pairs. */
18629 void_ftype_pv8qi_v8qi_v8qi =
18630 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18631 V8QI_type_node, NULL);
18632 void_ftype_pv4hi_v4hi_v4hi =
18633 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18634 V4HI_type_node, NULL);
18635 void_ftype_pv2si_v2si_v2si =
18636 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18637 V2SI_type_node, NULL);
18638 void_ftype_pv2sf_v2sf_v2sf =
18639 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18640 V2SF_type_node, NULL);
18641 void_ftype_pdi_di_di =
18642 build_function_type_list (void_type_node, intDI_pointer_node,
18643 neon_intDI_type_node, neon_intDI_type_node, NULL);
18644 void_ftype_pv16qi_v16qi_v16qi =
18645 build_function_type_list (void_type_node, V16QI_pointer_node,
18646 V16QI_type_node, V16QI_type_node, NULL);
18647 void_ftype_pv8hi_v8hi_v8hi =
18648 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18649 V8HI_type_node, NULL);
18650 void_ftype_pv4si_v4si_v4si =
18651 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18652 V4SI_type_node, NULL);
18653 void_ftype_pv4sf_v4sf_v4sf =
18654 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18655 V4SF_type_node, NULL);
18656 void_ftype_pv2di_v2di_v2di =
18657 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18658 V2DI_type_node, NULL);
18660 dreg_types[0] = V8QI_type_node;
18661 dreg_types[1] = V4HI_type_node;
18662 dreg_types[2] = V2SI_type_node;
18663 dreg_types[3] = V2SF_type_node;
18664 dreg_types[4] = neon_intDI_type_node;
18666 qreg_types[0] = V16QI_type_node;
18667 qreg_types[1] = V8HI_type_node;
18668 qreg_types[2] = V4SI_type_node;
18669 qreg_types[3] = V4SF_type_node;
18670 qreg_types[4] = V2DI_type_node;
18672 for (i = 0; i < 5; i++)
18675 for (j = 0; j < 5; j++)
18677 reinterp_ftype_dreg[i][j]
18678 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18679 reinterp_ftype_qreg[i][j]
18680 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18684 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18686 neon_builtin_datum *d = &neon_builtin_data[i];
18687 unsigned int j, codeidx = 0;
18689 d->base_fcode = fcode;
18691 for (j = 0; j < T_MAX; j++)
18693 const char* const modenames[] = {
18694 "v8qi", "v4hi", "v2si", "v2sf", "di",
18695 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18699 enum insn_code icode;
18700 int is_load = 0, is_store = 0;
18702 if ((d->bits & (1 << j)) == 0)
18705 icode = d->codes[codeidx++];
18710 case NEON_LOAD1LANE:
18711 case NEON_LOADSTRUCT:
18712 case NEON_LOADSTRUCTLANE:
18714 /* Fall through. */
18716 case NEON_STORE1LANE:
18717 case NEON_STORESTRUCT:
18718 case NEON_STORESTRUCTLANE:
18721 /* Fall through. */
18724 case NEON_LOGICBINOP:
18725 case NEON_SHIFTINSERT:
18732 case NEON_SHIFTIMM:
18733 case NEON_SHIFTACC:
18739 case NEON_LANEMULL:
18740 case NEON_LANEMULH:
18742 case NEON_SCALARMUL:
18743 case NEON_SCALARMULL:
18744 case NEON_SCALARMULH:
18745 case NEON_SCALARMAC:
18751 tree return_type = void_type_node, args = void_list_node;
18753 /* Build a function type directly from the insn_data for this
18754 builtin. The build_function_type() function takes care of
18755 removing duplicates for us. */
18756 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18760 if (is_load && k == 1)
18762 /* Neon load patterns always have the memory operand
18763 (a SImode pointer) in the operand 1 position. We
18764 want a const pointer to the element type in that
18766 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18772 eltype = const_intQI_pointer_node;
18777 eltype = const_intHI_pointer_node;
18782 eltype = const_intSI_pointer_node;
18787 eltype = const_float_pointer_node;
18792 eltype = const_intDI_pointer_node;
18795 default: gcc_unreachable ();
18798 else if (is_store && k == 0)
18800 /* Similarly, Neon store patterns use operand 0 as
18801 the memory location to store to (a SImode pointer).
18802 Use a pointer to the element type of the store in
18804 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18810 eltype = intQI_pointer_node;
18815 eltype = intHI_pointer_node;
18820 eltype = intSI_pointer_node;
18825 eltype = float_pointer_node;
18830 eltype = intDI_pointer_node;
18833 default: gcc_unreachable ();
18838 switch (insn_data[icode].operand[k].mode)
18840 case VOIDmode: eltype = void_type_node; break;
18842 case QImode: eltype = neon_intQI_type_node; break;
18843 case HImode: eltype = neon_intHI_type_node; break;
18844 case SImode: eltype = neon_intSI_type_node; break;
18845 case SFmode: eltype = neon_float_type_node; break;
18846 case DImode: eltype = neon_intDI_type_node; break;
18847 case TImode: eltype = intTI_type_node; break;
18848 case EImode: eltype = intEI_type_node; break;
18849 case OImode: eltype = intOI_type_node; break;
18850 case CImode: eltype = intCI_type_node; break;
18851 case XImode: eltype = intXI_type_node; break;
18852 /* 64-bit vectors. */
18853 case V8QImode: eltype = V8QI_type_node; break;
18854 case V4HImode: eltype = V4HI_type_node; break;
18855 case V2SImode: eltype = V2SI_type_node; break;
18856 case V2SFmode: eltype = V2SF_type_node; break;
18857 /* 128-bit vectors. */
18858 case V16QImode: eltype = V16QI_type_node; break;
18859 case V8HImode: eltype = V8HI_type_node; break;
18860 case V4SImode: eltype = V4SI_type_node; break;
18861 case V4SFmode: eltype = V4SF_type_node; break;
18862 case V2DImode: eltype = V2DI_type_node; break;
18863 default: gcc_unreachable ();
18867 if (k == 0 && !is_store)
18868 return_type = eltype;
18870 args = tree_cons (NULL_TREE, eltype, args);
18873 ftype = build_function_type (return_type, args);
18877 case NEON_RESULTPAIR:
18879 switch (insn_data[icode].operand[1].mode)
18881 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18882 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18883 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18884 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18885 case DImode: ftype = void_ftype_pdi_di_di; break;
18886 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18887 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18888 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18889 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18890 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18891 default: gcc_unreachable ();
18896 case NEON_REINTERP:
18898 /* We iterate over 5 doubleword types, then 5 quadword
18901 switch (insn_data[icode].operand[0].mode)
18903 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18904 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18905 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18906 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18907 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18908 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18909 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18910 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18911 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18912 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18913 default: gcc_unreachable ();
18919 gcc_unreachable ();
18922 gcc_assert (ftype != NULL);
18924 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18926 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18933 arm_init_fp16_builtins (void)
18935 tree fp16_type = make_node (REAL_TYPE);
18936 TYPE_PRECISION (fp16_type) = 16;
18937 layout_type (fp16_type);
18938 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18942 arm_init_builtins (void)
18944 arm_init_tls_builtins ();
18946 if (TARGET_REALLY_IWMMXT)
18947 arm_init_iwmmxt_builtins ();
18950 arm_init_neon_builtins ();
18952 if (arm_fp16_format)
18953 arm_init_fp16_builtins ();
18956 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18958 static const char *
18959 arm_invalid_parameter_type (const_tree t)
18961 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18962 return N_("function parameters cannot have __fp16 type");
18966 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18968 static const char *
18969 arm_invalid_return_type (const_tree t)
18971 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18972 return N_("functions cannot return __fp16 type");
18976 /* Implement TARGET_PROMOTED_TYPE. */
18979 arm_promoted_type (const_tree t)
18981 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18982 return float_type_node;
18986 /* Implement TARGET_CONVERT_TO_TYPE.
18987 Specifically, this hook implements the peculiarity of the ARM
18988 half-precision floating-point C semantics that requires conversions between
18989 __fp16 to or from double to do an intermediate conversion to float. */
18992 arm_convert_to_type (tree type, tree expr)
18994 tree fromtype = TREE_TYPE (expr);
18995 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18997 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18998 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18999 return convert (type, convert (float_type_node, expr));
19003 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19004 This simply adds HFmode as a supported mode; even though we don't
19005 implement arithmetic on this type directly, it's supported by
19006 optabs conversions, much the way the double-word arithmetic is
19007 special-cased in the default hook. */
19010 arm_scalar_mode_supported_p (enum machine_mode mode)
19012 if (mode == HFmode)
19013 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19015 return default_scalar_mode_supported_p (mode);
19018 /* Errors in the source file can cause expand_expr to return const0_rtx
19019 where we expect a vector. To avoid crashing, use one of the vector
19020 clear instructions. */
19023 safe_vector_operand (rtx x, enum machine_mode mode)
19025 if (x != const0_rtx)
19027 x = gen_reg_rtx (mode);
19029 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19030 : gen_rtx_SUBREG (DImode, x, 0)));
19034 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19037 arm_expand_binop_builtin (enum insn_code icode,
19038 tree exp, rtx target)
19041 tree arg0 = CALL_EXPR_ARG (exp, 0);
19042 tree arg1 = CALL_EXPR_ARG (exp, 1);
19043 rtx op0 = expand_normal (arg0);
19044 rtx op1 = expand_normal (arg1);
19045 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19046 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19047 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19049 if (VECTOR_MODE_P (mode0))
19050 op0 = safe_vector_operand (op0, mode0);
19051 if (VECTOR_MODE_P (mode1))
19052 op1 = safe_vector_operand (op1, mode1);
19055 || GET_MODE (target) != tmode
19056 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19057 target = gen_reg_rtx (tmode);
19059 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19061 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19062 op0 = copy_to_mode_reg (mode0, op0);
19063 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19064 op1 = copy_to_mode_reg (mode1, op1);
19066 pat = GEN_FCN (icode) (target, op0, op1);
19073 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19076 arm_expand_unop_builtin (enum insn_code icode,
19077 tree exp, rtx target, int do_load)
19080 tree arg0 = CALL_EXPR_ARG (exp, 0);
19081 rtx op0 = expand_normal (arg0);
19082 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19083 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19086 || GET_MODE (target) != tmode
19087 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19088 target = gen_reg_rtx (tmode);
19090 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19093 if (VECTOR_MODE_P (mode0))
19094 op0 = safe_vector_operand (op0, mode0);
19096 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19097 op0 = copy_to_mode_reg (mode0, op0);
19100 pat = GEN_FCN (icode) (target, op0);
19108 neon_builtin_compare (const void *a, const void *b)
19110 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19111 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19112 unsigned int soughtcode = key->base_fcode;
19114 if (soughtcode >= memb->base_fcode
19115 && soughtcode < memb->base_fcode + memb->num_vars)
19117 else if (soughtcode < memb->base_fcode)
19123 static enum insn_code
19124 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19126 neon_builtin_datum key
19127 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19128 neon_builtin_datum *found;
19131 key.base_fcode = fcode;
19132 found = (neon_builtin_datum *)
19133 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19134 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19135 gcc_assert (found);
19136 idx = fcode - (int) found->base_fcode;
19137 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19140 *itype = found->itype;
19142 return found->codes[idx];
19146 NEON_ARG_COPY_TO_REG,
19151 #define NEON_MAX_BUILTIN_ARGS 5
19153 /* Expand a Neon builtin. */
19155 arm_expand_neon_args (rtx target, int icode, int have_retval,
19160 tree arg[NEON_MAX_BUILTIN_ARGS];
19161 rtx op[NEON_MAX_BUILTIN_ARGS];
19162 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19163 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19168 || GET_MODE (target) != tmode
19169 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19170 target = gen_reg_rtx (tmode);
19172 va_start (ap, exp);
19176 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19178 if (thisarg == NEON_ARG_STOP)
19182 arg[argc] = CALL_EXPR_ARG (exp, argc);
19183 op[argc] = expand_normal (arg[argc]);
19184 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19188 case NEON_ARG_COPY_TO_REG:
19189 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19190 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19191 (op[argc], mode[argc]))
19192 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19195 case NEON_ARG_CONSTANT:
19196 /* FIXME: This error message is somewhat unhelpful. */
19197 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19198 (op[argc], mode[argc]))
19199 error ("argument must be a constant");
19202 case NEON_ARG_STOP:
19203 gcc_unreachable ();
19216 pat = GEN_FCN (icode) (target, op[0]);
19220 pat = GEN_FCN (icode) (target, op[0], op[1]);
19224 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19228 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19232 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19236 gcc_unreachable ();
19242 pat = GEN_FCN (icode) (op[0]);
19246 pat = GEN_FCN (icode) (op[0], op[1]);
19250 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19254 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19258 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19262 gcc_unreachable ();
19273 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19274 constants defined per-instruction or per instruction-variant. Instead, the
19275 required info is looked up in the table neon_builtin_data. */
19277 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19280 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19287 return arm_expand_neon_args (target, icode, 1, exp,
19288 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19292 case NEON_SCALARMUL:
19293 case NEON_SCALARMULL:
19294 case NEON_SCALARMULH:
19295 case NEON_SHIFTINSERT:
19296 case NEON_LOGICBINOP:
19297 return arm_expand_neon_args (target, icode, 1, exp,
19298 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19302 return arm_expand_neon_args (target, icode, 1, exp,
19303 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19304 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19308 case NEON_SHIFTIMM:
19309 return arm_expand_neon_args (target, icode, 1, exp,
19310 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19314 return arm_expand_neon_args (target, icode, 1, exp,
19315 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19319 case NEON_REINTERP:
19320 return arm_expand_neon_args (target, icode, 1, exp,
19321 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19325 return arm_expand_neon_args (target, icode, 1, exp,
19326 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19328 case NEON_RESULTPAIR:
19329 return arm_expand_neon_args (target, icode, 0, exp,
19330 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19334 case NEON_LANEMULL:
19335 case NEON_LANEMULH:
19336 return arm_expand_neon_args (target, icode, 1, exp,
19337 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19338 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19341 return arm_expand_neon_args (target, icode, 1, exp,
19342 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19343 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19345 case NEON_SHIFTACC:
19346 return arm_expand_neon_args (target, icode, 1, exp,
19347 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19348 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19350 case NEON_SCALARMAC:
19351 return arm_expand_neon_args (target, icode, 1, exp,
19352 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19353 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19357 return arm_expand_neon_args (target, icode, 1, exp,
19358 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19362 case NEON_LOADSTRUCT:
19363 return arm_expand_neon_args (target, icode, 1, exp,
19364 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19366 case NEON_LOAD1LANE:
19367 case NEON_LOADSTRUCTLANE:
19368 return arm_expand_neon_args (target, icode, 1, exp,
19369 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19373 case NEON_STORESTRUCT:
19374 return arm_expand_neon_args (target, icode, 0, exp,
19375 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19377 case NEON_STORE1LANE:
19378 case NEON_STORESTRUCTLANE:
19379 return arm_expand_neon_args (target, icode, 0, exp,
19380 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19384 gcc_unreachable ();
19387 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19389 neon_reinterpret (rtx dest, rtx src)
19391 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19394 /* Emit code to place a Neon pair result in memory locations (with equal
19397 neon_emit_pair_result_insn (enum machine_mode mode,
19398 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19401 rtx mem = gen_rtx_MEM (mode, destaddr);
19402 rtx tmp1 = gen_reg_rtx (mode);
19403 rtx tmp2 = gen_reg_rtx (mode);
19405 emit_insn (intfn (tmp1, op1, tmp2, op2));
19407 emit_move_insn (mem, tmp1);
19408 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19409 emit_move_insn (mem, tmp2);
19412 /* Set up operands for a register copy from src to dest, taking care not to
19413 clobber registers in the process.
19414 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19415 be called with a large N, so that should be OK. */
19418 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19420 unsigned int copied = 0, opctr = 0;
19421 unsigned int done = (1 << count) - 1;
19424 while (copied != done)
19426 for (i = 0; i < count; i++)
19430 for (j = 0; good && j < count; j++)
19431 if (i != j && (copied & (1 << j)) == 0
19432 && reg_overlap_mentioned_p (src[j], dest[i]))
19437 operands[opctr++] = dest[i];
19438 operands[opctr++] = src[i];
19444 gcc_assert (opctr == count * 2);
19447 /* Expand an expression EXP that calls a built-in function,
19448 with result going to TARGET if that's convenient
19449 (and in mode MODE if that's convenient).
19450 SUBTARGET may be used as the target for computing one of EXP's operands.
19451 IGNORE is nonzero if the value is to be ignored. */
19454 arm_expand_builtin (tree exp,
19456 rtx subtarget ATTRIBUTE_UNUSED,
19457 enum machine_mode mode ATTRIBUTE_UNUSED,
19458 int ignore ATTRIBUTE_UNUSED)
19460 const struct builtin_description * d;
19461 enum insn_code icode;
19462 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19470 int fcode = DECL_FUNCTION_CODE (fndecl);
19472 enum machine_mode tmode;
19473 enum machine_mode mode0;
19474 enum machine_mode mode1;
19475 enum machine_mode mode2;
19477 if (fcode >= ARM_BUILTIN_NEON_BASE)
19478 return arm_expand_neon_builtin (fcode, exp, target);
19482 case ARM_BUILTIN_TEXTRMSB:
19483 case ARM_BUILTIN_TEXTRMUB:
19484 case ARM_BUILTIN_TEXTRMSH:
19485 case ARM_BUILTIN_TEXTRMUH:
19486 case ARM_BUILTIN_TEXTRMSW:
19487 case ARM_BUILTIN_TEXTRMUW:
19488 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19489 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19490 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19491 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19492 : CODE_FOR_iwmmxt_textrmw);
19494 arg0 = CALL_EXPR_ARG (exp, 0);
19495 arg1 = CALL_EXPR_ARG (exp, 1);
19496 op0 = expand_normal (arg0);
19497 op1 = expand_normal (arg1);
19498 tmode = insn_data[icode].operand[0].mode;
19499 mode0 = insn_data[icode].operand[1].mode;
19500 mode1 = insn_data[icode].operand[2].mode;
19502 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19503 op0 = copy_to_mode_reg (mode0, op0);
19504 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19506 /* @@@ better error message */
19507 error ("selector must be an immediate");
19508 return gen_reg_rtx (tmode);
19511 || GET_MODE (target) != tmode
19512 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19513 target = gen_reg_rtx (tmode);
19514 pat = GEN_FCN (icode) (target, op0, op1);
19520 case ARM_BUILTIN_TINSRB:
19521 case ARM_BUILTIN_TINSRH:
19522 case ARM_BUILTIN_TINSRW:
19523 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19524 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19525 : CODE_FOR_iwmmxt_tinsrw);
19526 arg0 = CALL_EXPR_ARG (exp, 0);
19527 arg1 = CALL_EXPR_ARG (exp, 1);
19528 arg2 = CALL_EXPR_ARG (exp, 2);
19529 op0 = expand_normal (arg0);
19530 op1 = expand_normal (arg1);
19531 op2 = expand_normal (arg2);
19532 tmode = insn_data[icode].operand[0].mode;
19533 mode0 = insn_data[icode].operand[1].mode;
19534 mode1 = insn_data[icode].operand[2].mode;
19535 mode2 = insn_data[icode].operand[3].mode;
19537 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19538 op0 = copy_to_mode_reg (mode0, op0);
19539 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19540 op1 = copy_to_mode_reg (mode1, op1);
19541 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19543 /* @@@ better error message */
19544 error ("selector must be an immediate");
19548 || GET_MODE (target) != tmode
19549 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19550 target = gen_reg_rtx (tmode);
19551 pat = GEN_FCN (icode) (target, op0, op1, op2);
19557 case ARM_BUILTIN_SETWCX:
19558 arg0 = CALL_EXPR_ARG (exp, 0);
19559 arg1 = CALL_EXPR_ARG (exp, 1);
19560 op0 = force_reg (SImode, expand_normal (arg0));
19561 op1 = expand_normal (arg1);
19562 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19565 case ARM_BUILTIN_GETWCX:
19566 arg0 = CALL_EXPR_ARG (exp, 0);
19567 op0 = expand_normal (arg0);
19568 target = gen_reg_rtx (SImode);
19569 emit_insn (gen_iwmmxt_tmrc (target, op0));
19572 case ARM_BUILTIN_WSHUFH:
19573 icode = CODE_FOR_iwmmxt_wshufh;
19574 arg0 = CALL_EXPR_ARG (exp, 0);
19575 arg1 = CALL_EXPR_ARG (exp, 1);
19576 op0 = expand_normal (arg0);
19577 op1 = expand_normal (arg1);
19578 tmode = insn_data[icode].operand[0].mode;
19579 mode1 = insn_data[icode].operand[1].mode;
19580 mode2 = insn_data[icode].operand[2].mode;
19582 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19583 op0 = copy_to_mode_reg (mode1, op0);
19584 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19586 /* @@@ better error message */
19587 error ("mask must be an immediate");
19591 || GET_MODE (target) != tmode
19592 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19593 target = gen_reg_rtx (tmode);
19594 pat = GEN_FCN (icode) (target, op0, op1);
19600 case ARM_BUILTIN_WSADB:
19601 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19602 case ARM_BUILTIN_WSADH:
19603 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19604 case ARM_BUILTIN_WSADBZ:
19605 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19606 case ARM_BUILTIN_WSADHZ:
19607 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19609 /* Several three-argument builtins. */
19610 case ARM_BUILTIN_WMACS:
19611 case ARM_BUILTIN_WMACU:
19612 case ARM_BUILTIN_WALIGN:
19613 case ARM_BUILTIN_TMIA:
19614 case ARM_BUILTIN_TMIAPH:
19615 case ARM_BUILTIN_TMIATT:
19616 case ARM_BUILTIN_TMIATB:
19617 case ARM_BUILTIN_TMIABT:
19618 case ARM_BUILTIN_TMIABB:
19619 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19620 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19621 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19622 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19623 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19624 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19625 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19626 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19627 : CODE_FOR_iwmmxt_walign);
19628 arg0 = CALL_EXPR_ARG (exp, 0);
19629 arg1 = CALL_EXPR_ARG (exp, 1);
19630 arg2 = CALL_EXPR_ARG (exp, 2);
19631 op0 = expand_normal (arg0);
19632 op1 = expand_normal (arg1);
19633 op2 = expand_normal (arg2);
19634 tmode = insn_data[icode].operand[0].mode;
19635 mode0 = insn_data[icode].operand[1].mode;
19636 mode1 = insn_data[icode].operand[2].mode;
19637 mode2 = insn_data[icode].operand[3].mode;
19639 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19640 op0 = copy_to_mode_reg (mode0, op0);
19641 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19642 op1 = copy_to_mode_reg (mode1, op1);
19643 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19644 op2 = copy_to_mode_reg (mode2, op2);
19646 || GET_MODE (target) != tmode
19647 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19648 target = gen_reg_rtx (tmode);
19649 pat = GEN_FCN (icode) (target, op0, op1, op2);
19655 case ARM_BUILTIN_WZERO:
19656 target = gen_reg_rtx (DImode);
19657 emit_insn (gen_iwmmxt_clrdi (target));
19660 case ARM_BUILTIN_THREAD_POINTER:
19661 return arm_load_tp (target);
19667 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19668 if (d->code == (const enum arm_builtins) fcode)
19669 return arm_expand_binop_builtin (d->icode, exp, target);
19671 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19672 if (d->code == (const enum arm_builtins) fcode)
19673 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19675 /* @@@ Should really do something sensible here. */
19679 /* Return the number (counting from 0) of
19680 the least significant set bit in MASK. */
19683 number_of_first_bit_set (unsigned mask)
19688 (mask & (1 << bit)) == 0;
19695 /* Emit code to push or pop registers to or from the stack. F is the
19696 assembly file. MASK is the registers to push or pop. PUSH is
19697 nonzero if we should push, and zero if we should pop. For debugging
19698 output, if pushing, adjust CFA_OFFSET by the amount of space added
19699 to the stack. REAL_REGS should have the same number of bits set as
19700 MASK, and will be used instead (in the same order) to describe which
19701 registers were saved - this is used to mark the save slots when we
19702 push high registers after moving them to low registers. */
19704 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19705 unsigned long real_regs)
19708 int lo_mask = mask & 0xFF;
19709 int pushed_words = 0;
19713 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19715 /* Special case. Do not generate a POP PC statement here, do it in
19717 thumb_exit (f, -1);
19721 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19723 fprintf (f, "\t.save\t{");
19724 for (regno = 0; regno < 15; regno++)
19726 if (real_regs & (1 << regno))
19728 if (real_regs & ((1 << regno) -1))
19730 asm_fprintf (f, "%r", regno);
19733 fprintf (f, "}\n");
19736 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19738 /* Look at the low registers first. */
19739 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19743 asm_fprintf (f, "%r", regno);
19745 if ((lo_mask & ~1) != 0)
19752 if (push && (mask & (1 << LR_REGNUM)))
19754 /* Catch pushing the LR. */
19758 asm_fprintf (f, "%r", LR_REGNUM);
19762 else if (!push && (mask & (1 << PC_REGNUM)))
19764 /* Catch popping the PC. */
19765 if (TARGET_INTERWORK || TARGET_BACKTRACE
19766 || crtl->calls_eh_return)
19768 /* The PC is never poped directly, instead
19769 it is popped into r3 and then BX is used. */
19770 fprintf (f, "}\n");
19772 thumb_exit (f, -1);
19781 asm_fprintf (f, "%r", PC_REGNUM);
19785 fprintf (f, "}\n");
19787 if (push && pushed_words && dwarf2out_do_frame ())
19789 char *l = dwarf2out_cfi_label (false);
19790 int pushed_mask = real_regs;
19792 *cfa_offset += pushed_words * 4;
19793 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19796 pushed_mask = real_regs;
19797 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19799 if (pushed_mask & 1)
19800 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19805 /* Generate code to return from a thumb function.
19806 If 'reg_containing_return_addr' is -1, then the return address is
19807 actually on the stack, at the stack pointer. */
19809 thumb_exit (FILE *f, int reg_containing_return_addr)
19811 unsigned regs_available_for_popping;
19812 unsigned regs_to_pop;
19814 unsigned available;
19818 int restore_a4 = FALSE;
19820 /* Compute the registers we need to pop. */
19824 if (reg_containing_return_addr == -1)
19826 regs_to_pop |= 1 << LR_REGNUM;
19830 if (TARGET_BACKTRACE)
19832 /* Restore the (ARM) frame pointer and stack pointer. */
19833 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19837 /* If there is nothing to pop then just emit the BX instruction and
19839 if (pops_needed == 0)
19841 if (crtl->calls_eh_return)
19842 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19844 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19847 /* Otherwise if we are not supporting interworking and we have not created
19848 a backtrace structure and the function was not entered in ARM mode then
19849 just pop the return address straight into the PC. */
19850 else if (!TARGET_INTERWORK
19851 && !TARGET_BACKTRACE
19852 && !is_called_in_ARM_mode (current_function_decl)
19853 && !crtl->calls_eh_return)
19855 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19859 /* Find out how many of the (return) argument registers we can corrupt. */
19860 regs_available_for_popping = 0;
19862 /* If returning via __builtin_eh_return, the bottom three registers
19863 all contain information needed for the return. */
19864 if (crtl->calls_eh_return)
19868 /* If we can deduce the registers used from the function's
19869 return value. This is more reliable that examining
19870 df_regs_ever_live_p () because that will be set if the register is
19871 ever used in the function, not just if the register is used
19872 to hold a return value. */
19874 if (crtl->return_rtx != 0)
19875 mode = GET_MODE (crtl->return_rtx);
19877 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19879 size = GET_MODE_SIZE (mode);
19883 /* In a void function we can use any argument register.
19884 In a function that returns a structure on the stack
19885 we can use the second and third argument registers. */
19886 if (mode == VOIDmode)
19887 regs_available_for_popping =
19888 (1 << ARG_REGISTER (1))
19889 | (1 << ARG_REGISTER (2))
19890 | (1 << ARG_REGISTER (3));
19892 regs_available_for_popping =
19893 (1 << ARG_REGISTER (2))
19894 | (1 << ARG_REGISTER (3));
19896 else if (size <= 4)
19897 regs_available_for_popping =
19898 (1 << ARG_REGISTER (2))
19899 | (1 << ARG_REGISTER (3));
19900 else if (size <= 8)
19901 regs_available_for_popping =
19902 (1 << ARG_REGISTER (3));
19905 /* Match registers to be popped with registers into which we pop them. */
19906 for (available = regs_available_for_popping,
19907 required = regs_to_pop;
19908 required != 0 && available != 0;
19909 available &= ~(available & - available),
19910 required &= ~(required & - required))
19913 /* If we have any popping registers left over, remove them. */
19915 regs_available_for_popping &= ~available;
19917 /* Otherwise if we need another popping register we can use
19918 the fourth argument register. */
19919 else if (pops_needed)
19921 /* If we have not found any free argument registers and
19922 reg a4 contains the return address, we must move it. */
19923 if (regs_available_for_popping == 0
19924 && reg_containing_return_addr == LAST_ARG_REGNUM)
19926 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19927 reg_containing_return_addr = LR_REGNUM;
19929 else if (size > 12)
19931 /* Register a4 is being used to hold part of the return value,
19932 but we have dire need of a free, low register. */
19935 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19938 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19940 /* The fourth argument register is available. */
19941 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19947 /* Pop as many registers as we can. */
19948 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19949 regs_available_for_popping);
19951 /* Process the registers we popped. */
19952 if (reg_containing_return_addr == -1)
19954 /* The return address was popped into the lowest numbered register. */
19955 regs_to_pop &= ~(1 << LR_REGNUM);
19957 reg_containing_return_addr =
19958 number_of_first_bit_set (regs_available_for_popping);
19960 /* Remove this register for the mask of available registers, so that
19961 the return address will not be corrupted by further pops. */
19962 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19965 /* If we popped other registers then handle them here. */
19966 if (regs_available_for_popping)
19970 /* Work out which register currently contains the frame pointer. */
19971 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19973 /* Move it into the correct place. */
19974 asm_fprintf (f, "\tmov\t%r, %r\n",
19975 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19977 /* (Temporarily) remove it from the mask of popped registers. */
19978 regs_available_for_popping &= ~(1 << frame_pointer);
19979 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19981 if (regs_available_for_popping)
19985 /* We popped the stack pointer as well,
19986 find the register that contains it. */
19987 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19989 /* Move it into the stack register. */
19990 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19992 /* At this point we have popped all necessary registers, so
19993 do not worry about restoring regs_available_for_popping
19994 to its correct value:
19996 assert (pops_needed == 0)
19997 assert (regs_available_for_popping == (1 << frame_pointer))
19998 assert (regs_to_pop == (1 << STACK_POINTER)) */
20002 /* Since we have just move the popped value into the frame
20003 pointer, the popping register is available for reuse, and
20004 we know that we still have the stack pointer left to pop. */
20005 regs_available_for_popping |= (1 << frame_pointer);
20009 /* If we still have registers left on the stack, but we no longer have
20010 any registers into which we can pop them, then we must move the return
20011 address into the link register and make available the register that
20013 if (regs_available_for_popping == 0 && pops_needed > 0)
20015 regs_available_for_popping |= 1 << reg_containing_return_addr;
20017 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20018 reg_containing_return_addr);
20020 reg_containing_return_addr = LR_REGNUM;
20023 /* If we have registers left on the stack then pop some more.
20024 We know that at most we will want to pop FP and SP. */
20025 if (pops_needed > 0)
20030 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20031 regs_available_for_popping);
20033 /* We have popped either FP or SP.
20034 Move whichever one it is into the correct register. */
20035 popped_into = number_of_first_bit_set (regs_available_for_popping);
20036 move_to = number_of_first_bit_set (regs_to_pop);
20038 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20040 regs_to_pop &= ~(1 << move_to);
20045 /* If we still have not popped everything then we must have only
20046 had one register available to us and we are now popping the SP. */
20047 if (pops_needed > 0)
20051 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20052 regs_available_for_popping);
20054 popped_into = number_of_first_bit_set (regs_available_for_popping);
20056 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20058 assert (regs_to_pop == (1 << STACK_POINTER))
20059 assert (pops_needed == 1)
20063 /* If necessary restore the a4 register. */
20066 if (reg_containing_return_addr != LR_REGNUM)
20068 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20069 reg_containing_return_addr = LR_REGNUM;
20072 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20075 if (crtl->calls_eh_return)
20076 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20078 /* Return to caller. */
20079 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20082 /* Scan INSN just before assembler is output for it.
20083 For Thumb-1, we track the status of the condition codes; this
20084 information is used in the cbranchsi4_insn pattern. */
20086 thumb1_final_prescan_insn (rtx insn)
20088 if (flag_print_asm_name)
20089 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20090 INSN_ADDRESSES (INSN_UID (insn)));
20091 /* Don't overwrite the previous setter when we get to a cbranch. */
20092 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20094 enum attr_conds conds;
20096 if (cfun->machine->thumb1_cc_insn)
20098 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20099 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20102 conds = get_attr_conds (insn);
20103 if (conds == CONDS_SET)
20105 rtx set = single_set (insn);
20106 cfun->machine->thumb1_cc_insn = insn;
20107 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20108 cfun->machine->thumb1_cc_op1 = const0_rtx;
20109 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20110 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20112 rtx src1 = XEXP (SET_SRC (set), 1);
20113 if (src1 == const0_rtx)
20114 cfun->machine->thumb1_cc_mode = CCmode;
20117 else if (conds != CONDS_NOCOND)
20118 cfun->machine->thumb1_cc_insn = NULL_RTX;
20123 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20125 unsigned HOST_WIDE_INT mask = 0xff;
20128 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20129 if (val == 0) /* XXX */
20132 for (i = 0; i < 25; i++)
20133 if ((val & (mask << i)) == val)
20139 /* Returns nonzero if the current function contains,
20140 or might contain a far jump. */
20142 thumb_far_jump_used_p (void)
20146 /* This test is only important for leaf functions. */
20147 /* assert (!leaf_function_p ()); */
20149 /* If we have already decided that far jumps may be used,
20150 do not bother checking again, and always return true even if
20151 it turns out that they are not being used. Once we have made
20152 the decision that far jumps are present (and that hence the link
20153 register will be pushed onto the stack) we cannot go back on it. */
20154 if (cfun->machine->far_jump_used)
20157 /* If this function is not being called from the prologue/epilogue
20158 generation code then it must be being called from the
20159 INITIAL_ELIMINATION_OFFSET macro. */
20160 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20162 /* In this case we know that we are being asked about the elimination
20163 of the arg pointer register. If that register is not being used,
20164 then there are no arguments on the stack, and we do not have to
20165 worry that a far jump might force the prologue to push the link
20166 register, changing the stack offsets. In this case we can just
20167 return false, since the presence of far jumps in the function will
20168 not affect stack offsets.
20170 If the arg pointer is live (or if it was live, but has now been
20171 eliminated and so set to dead) then we do have to test to see if
20172 the function might contain a far jump. This test can lead to some
20173 false negatives, since before reload is completed, then length of
20174 branch instructions is not known, so gcc defaults to returning their
20175 longest length, which in turn sets the far jump attribute to true.
20177 A false negative will not result in bad code being generated, but it
20178 will result in a needless push and pop of the link register. We
20179 hope that this does not occur too often.
20181 If we need doubleword stack alignment this could affect the other
20182 elimination offsets so we can't risk getting it wrong. */
20183 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20184 cfun->machine->arg_pointer_live = 1;
20185 else if (!cfun->machine->arg_pointer_live)
20189 /* Check to see if the function contains a branch
20190 insn with the far jump attribute set. */
20191 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20193 if (GET_CODE (insn) == JUMP_INSN
20194 /* Ignore tablejump patterns. */
20195 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20196 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20197 && get_attr_far_jump (insn) == FAR_JUMP_YES
20200 /* Record the fact that we have decided that
20201 the function does use far jumps. */
20202 cfun->machine->far_jump_used = 1;
20210 /* Return nonzero if FUNC must be entered in ARM mode. */
20212 is_called_in_ARM_mode (tree func)
20214 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20216 /* Ignore the problem about functions whose address is taken. */
20217 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20221 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20227 /* Given the stack offsets and register mask in OFFSETS, decide how
20228 many additional registers to push instead of subtracting a constant
20229 from SP. For epilogues the principle is the same except we use pop.
20230 FOR_PROLOGUE indicates which we're generating. */
20232 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20234 HOST_WIDE_INT amount;
20235 unsigned long live_regs_mask = offsets->saved_regs_mask;
20236 /* Extract a mask of the ones we can give to the Thumb's push/pop
20238 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20239 /* Then count how many other high registers will need to be pushed. */
20240 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20241 int n_free, reg_base;
20243 if (!for_prologue && frame_pointer_needed)
20244 amount = offsets->locals_base - offsets->saved_regs;
20246 amount = offsets->outgoing_args - offsets->saved_regs;
20248 /* If the stack frame size is 512 exactly, we can save one load
20249 instruction, which should make this a win even when optimizing
20251 if (!optimize_size && amount != 512)
20254 /* Can't do this if there are high registers to push. */
20255 if (high_regs_pushed != 0)
20258 /* Shouldn't do it in the prologue if no registers would normally
20259 be pushed at all. In the epilogue, also allow it if we'll have
20260 a pop insn for the PC. */
20263 || TARGET_BACKTRACE
20264 || (live_regs_mask & 1 << LR_REGNUM) == 0
20265 || TARGET_INTERWORK
20266 || crtl->args.pretend_args_size != 0))
20269 /* Don't do this if thumb_expand_prologue wants to emit instructions
20270 between the push and the stack frame allocation. */
20272 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20273 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20280 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20281 live_regs_mask >>= reg_base;
20284 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20285 && (for_prologue || call_used_regs[reg_base + n_free]))
20287 live_regs_mask >>= 1;
20293 gcc_assert (amount / 4 * 4 == amount);
20295 if (amount >= 512 && (amount - n_free * 4) < 512)
20296 return (amount - 508) / 4;
20297 if (amount <= n_free * 4)
20302 /* The bits which aren't usefully expanded as rtl. */
20304 thumb_unexpanded_epilogue (void)
20306 arm_stack_offsets *offsets;
20308 unsigned long live_regs_mask = 0;
20309 int high_regs_pushed = 0;
20311 int had_to_push_lr;
20314 if (cfun->machine->return_used_this_function != 0)
20317 if (IS_NAKED (arm_current_func_type ()))
20320 offsets = arm_get_frame_offsets ();
20321 live_regs_mask = offsets->saved_regs_mask;
20322 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20324 /* If we can deduce the registers used from the function's return value.
20325 This is more reliable that examining df_regs_ever_live_p () because that
20326 will be set if the register is ever used in the function, not just if
20327 the register is used to hold a return value. */
20328 size = arm_size_return_regs ();
20330 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20333 unsigned long extra_mask = (1 << extra_pop) - 1;
20334 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20337 /* The prolog may have pushed some high registers to use as
20338 work registers. e.g. the testsuite file:
20339 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20340 compiles to produce:
20341 push {r4, r5, r6, r7, lr}
20345 as part of the prolog. We have to undo that pushing here. */
20347 if (high_regs_pushed)
20349 unsigned long mask = live_regs_mask & 0xff;
20352 /* The available low registers depend on the size of the value we are
20360 /* Oh dear! We have no low registers into which we can pop
20363 ("no low registers available for popping high registers");
20365 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20366 if (live_regs_mask & (1 << next_hi_reg))
20369 while (high_regs_pushed)
20371 /* Find lo register(s) into which the high register(s) can
20373 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20375 if (mask & (1 << regno))
20376 high_regs_pushed--;
20377 if (high_regs_pushed == 0)
20381 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20383 /* Pop the values into the low register(s). */
20384 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20386 /* Move the value(s) into the high registers. */
20387 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20389 if (mask & (1 << regno))
20391 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20394 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20395 if (live_regs_mask & (1 << next_hi_reg))
20400 live_regs_mask &= ~0x0f00;
20403 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20404 live_regs_mask &= 0xff;
20406 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20408 /* Pop the return address into the PC. */
20409 if (had_to_push_lr)
20410 live_regs_mask |= 1 << PC_REGNUM;
20412 /* Either no argument registers were pushed or a backtrace
20413 structure was created which includes an adjusted stack
20414 pointer, so just pop everything. */
20415 if (live_regs_mask)
20416 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20419 /* We have either just popped the return address into the
20420 PC or it is was kept in LR for the entire function.
20421 Note that thumb_pushpop has already called thumb_exit if the
20422 PC was in the list. */
20423 if (!had_to_push_lr)
20424 thumb_exit (asm_out_file, LR_REGNUM);
20428 /* Pop everything but the return address. */
20429 if (live_regs_mask)
20430 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20433 if (had_to_push_lr)
20437 /* We have no free low regs, so save one. */
20438 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20442 /* Get the return address into a temporary register. */
20443 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20444 1 << LAST_ARG_REGNUM);
20448 /* Move the return address to lr. */
20449 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20451 /* Restore the low register. */
20452 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20457 regno = LAST_ARG_REGNUM;
20462 /* Remove the argument registers that were pushed onto the stack. */
20463 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20464 SP_REGNUM, SP_REGNUM,
20465 crtl->args.pretend_args_size);
20467 thumb_exit (asm_out_file, regno);
20473 /* Functions to save and restore machine-specific function data. */
20474 static struct machine_function *
20475 arm_init_machine_status (void)
20477 struct machine_function *machine;
20478 machine = ggc_alloc_cleared_machine_function ();
20480 #if ARM_FT_UNKNOWN != 0
20481 machine->func_type = ARM_FT_UNKNOWN;
20486 /* Return an RTX indicating where the return address to the
20487 calling function can be found. */
20489 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20494 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20497 /* Do anything needed before RTL is emitted for each function. */
20499 arm_init_expanders (void)
20501 /* Arrange to initialize and mark the machine per-function status. */
20502 init_machine_status = arm_init_machine_status;
20504 /* This is to stop the combine pass optimizing away the alignment
20505 adjustment of va_arg. */
20506 /* ??? It is claimed that this should not be necessary. */
20508 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20512 /* Like arm_compute_initial_elimination offset. Simpler because there
20513 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20514 to point at the base of the local variables after static stack
20515 space for a function has been allocated. */
20518 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20520 arm_stack_offsets *offsets;
20522 offsets = arm_get_frame_offsets ();
20526 case ARG_POINTER_REGNUM:
20529 case STACK_POINTER_REGNUM:
20530 return offsets->outgoing_args - offsets->saved_args;
20532 case FRAME_POINTER_REGNUM:
20533 return offsets->soft_frame - offsets->saved_args;
20535 case ARM_HARD_FRAME_POINTER_REGNUM:
20536 return offsets->saved_regs - offsets->saved_args;
20538 case THUMB_HARD_FRAME_POINTER_REGNUM:
20539 return offsets->locals_base - offsets->saved_args;
20542 gcc_unreachable ();
20546 case FRAME_POINTER_REGNUM:
20549 case STACK_POINTER_REGNUM:
20550 return offsets->outgoing_args - offsets->soft_frame;
20552 case ARM_HARD_FRAME_POINTER_REGNUM:
20553 return offsets->saved_regs - offsets->soft_frame;
20555 case THUMB_HARD_FRAME_POINTER_REGNUM:
20556 return offsets->locals_base - offsets->soft_frame;
20559 gcc_unreachable ();
20564 gcc_unreachable ();
20568 /* Generate the rest of a function's prologue. */
20570 thumb1_expand_prologue (void)
20574 HOST_WIDE_INT amount;
20575 arm_stack_offsets *offsets;
20576 unsigned long func_type;
20578 unsigned long live_regs_mask;
20580 func_type = arm_current_func_type ();
20582 /* Naked functions don't have prologues. */
20583 if (IS_NAKED (func_type))
20586 if (IS_INTERRUPT (func_type))
20588 error ("interrupt Service Routines cannot be coded in Thumb mode");
20592 offsets = arm_get_frame_offsets ();
20593 live_regs_mask = offsets->saved_regs_mask;
20594 /* Load the pic register before setting the frame pointer,
20595 so we can use r7 as a temporary work register. */
20596 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20597 arm_load_pic_register (live_regs_mask);
20599 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20600 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20601 stack_pointer_rtx);
20603 amount = offsets->outgoing_args - offsets->saved_regs;
20604 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20609 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20610 GEN_INT (- amount)));
20611 RTX_FRAME_RELATED_P (insn) = 1;
20617 /* The stack decrement is too big for an immediate value in a single
20618 insn. In theory we could issue multiple subtracts, but after
20619 three of them it becomes more space efficient to place the full
20620 value in the constant pool and load into a register. (Also the
20621 ARM debugger really likes to see only one stack decrement per
20622 function). So instead we look for a scratch register into which
20623 we can load the decrement, and then we subtract this from the
20624 stack pointer. Unfortunately on the thumb the only available
20625 scratch registers are the argument registers, and we cannot use
20626 these as they may hold arguments to the function. Instead we
20627 attempt to locate a call preserved register which is used by this
20628 function. If we can find one, then we know that it will have
20629 been pushed at the start of the prologue and so we can corrupt
20631 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20632 if (live_regs_mask & (1 << regno))
20635 gcc_assert(regno <= LAST_LO_REGNUM);
20637 reg = gen_rtx_REG (SImode, regno);
20639 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20641 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20642 stack_pointer_rtx, reg));
20643 RTX_FRAME_RELATED_P (insn) = 1;
20644 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20645 plus_constant (stack_pointer_rtx,
20647 RTX_FRAME_RELATED_P (dwarf) = 1;
20648 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20652 if (frame_pointer_needed)
20653 thumb_set_frame_pointer (offsets);
20655 /* If we are profiling, make sure no instructions are scheduled before
20656 the call to mcount. Similarly if the user has requested no
20657 scheduling in the prolog. Similarly if we want non-call exceptions
20658 using the EABI unwinder, to prevent faulting instructions from being
20659 swapped with a stack adjustment. */
20660 if (crtl->profile || !TARGET_SCHED_PROLOG
20661 || (arm_except_unwind_info (&global_options) == UI_TARGET
20662 && cfun->can_throw_non_call_exceptions))
20663 emit_insn (gen_blockage ());
20665 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20666 if (live_regs_mask & 0xff)
20667 cfun->machine->lr_save_eliminated = 0;
20672 thumb1_expand_epilogue (void)
20674 HOST_WIDE_INT amount;
20675 arm_stack_offsets *offsets;
20678 /* Naked functions don't have prologues. */
20679 if (IS_NAKED (arm_current_func_type ()))
20682 offsets = arm_get_frame_offsets ();
20683 amount = offsets->outgoing_args - offsets->saved_regs;
20685 if (frame_pointer_needed)
20687 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20688 amount = offsets->locals_base - offsets->saved_regs;
20690 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20692 gcc_assert (amount >= 0);
20696 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20697 GEN_INT (amount)));
20700 /* r3 is always free in the epilogue. */
20701 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20703 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20704 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20708 /* Emit a USE (stack_pointer_rtx), so that
20709 the stack adjustment will not be deleted. */
20710 emit_insn (gen_prologue_use (stack_pointer_rtx));
20712 if (crtl->profile || !TARGET_SCHED_PROLOG)
20713 emit_insn (gen_blockage ());
20715 /* Emit a clobber for each insn that will be restored in the epilogue,
20716 so that flow2 will get register lifetimes correct. */
20717 for (regno = 0; regno < 13; regno++)
20718 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20719 emit_clobber (gen_rtx_REG (SImode, regno));
20721 if (! df_regs_ever_live_p (LR_REGNUM))
20722 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20726 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20728 arm_stack_offsets *offsets;
20729 unsigned long live_regs_mask = 0;
20730 unsigned long l_mask;
20731 unsigned high_regs_pushed = 0;
20732 int cfa_offset = 0;
20735 if (IS_NAKED (arm_current_func_type ()))
20738 if (is_called_in_ARM_mode (current_function_decl))
20742 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20743 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20745 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20747 /* Generate code sequence to switch us into Thumb mode. */
20748 /* The .code 32 directive has already been emitted by
20749 ASM_DECLARE_FUNCTION_NAME. */
20750 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20751 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20753 /* Generate a label, so that the debugger will notice the
20754 change in instruction sets. This label is also used by
20755 the assembler to bypass the ARM code when this function
20756 is called from a Thumb encoded function elsewhere in the
20757 same file. Hence the definition of STUB_NAME here must
20758 agree with the definition in gas/config/tc-arm.c. */
20760 #define STUB_NAME ".real_start_of"
20762 fprintf (f, "\t.code\t16\n");
20764 if (arm_dllexport_name_p (name))
20765 name = arm_strip_name_encoding (name);
20767 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20768 fprintf (f, "\t.thumb_func\n");
20769 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20772 if (crtl->args.pretend_args_size)
20774 /* Output unwind directive for the stack adjustment. */
20775 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20776 fprintf (f, "\t.pad #%d\n",
20777 crtl->args.pretend_args_size);
20779 if (cfun->machine->uses_anonymous_args)
20783 fprintf (f, "\tpush\t{");
20785 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20787 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20788 regno <= LAST_ARG_REGNUM;
20790 asm_fprintf (f, "%r%s", regno,
20791 regno == LAST_ARG_REGNUM ? "" : ", ");
20793 fprintf (f, "}\n");
20796 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20797 SP_REGNUM, SP_REGNUM,
20798 crtl->args.pretend_args_size);
20800 /* We don't need to record the stores for unwinding (would it
20801 help the debugger any if we did?), but record the change in
20802 the stack pointer. */
20803 if (dwarf2out_do_frame ())
20805 char *l = dwarf2out_cfi_label (false);
20807 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20808 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20812 /* Get the registers we are going to push. */
20813 offsets = arm_get_frame_offsets ();
20814 live_regs_mask = offsets->saved_regs_mask;
20815 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20816 l_mask = live_regs_mask & 0x40ff;
20817 /* Then count how many other high registers will need to be pushed. */
20818 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20820 if (TARGET_BACKTRACE)
20823 unsigned work_register;
20825 /* We have been asked to create a stack backtrace structure.
20826 The code looks like this:
20830 0 sub SP, #16 Reserve space for 4 registers.
20831 2 push {R7} Push low registers.
20832 4 add R7, SP, #20 Get the stack pointer before the push.
20833 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20834 8 mov R7, PC Get hold of the start of this code plus 12.
20835 10 str R7, [SP, #16] Store it.
20836 12 mov R7, FP Get hold of the current frame pointer.
20837 14 str R7, [SP, #4] Store it.
20838 16 mov R7, LR Get hold of the current return address.
20839 18 str R7, [SP, #12] Store it.
20840 20 add R7, SP, #16 Point at the start of the backtrace structure.
20841 22 mov FP, R7 Put this value into the frame pointer. */
20843 work_register = thumb_find_work_register (live_regs_mask);
20845 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20846 asm_fprintf (f, "\t.pad #16\n");
20849 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20850 SP_REGNUM, SP_REGNUM);
20852 if (dwarf2out_do_frame ())
20854 char *l = dwarf2out_cfi_label (false);
20856 cfa_offset = cfa_offset + 16;
20857 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20862 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20863 offset = bit_count (l_mask) * UNITS_PER_WORD;
20868 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20869 offset + 16 + crtl->args.pretend_args_size);
20871 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20874 /* Make sure that the instruction fetching the PC is in the right place
20875 to calculate "start of backtrace creation code + 12". */
20878 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20879 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20881 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20882 ARM_HARD_FRAME_POINTER_REGNUM);
20883 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20888 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20889 ARM_HARD_FRAME_POINTER_REGNUM);
20890 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20892 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20893 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20897 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20898 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20900 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20902 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20903 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20905 /* Optimization: If we are not pushing any low registers but we are going
20906 to push some high registers then delay our first push. This will just
20907 be a push of LR and we can combine it with the push of the first high
20909 else if ((l_mask & 0xff) != 0
20910 || (high_regs_pushed == 0 && l_mask))
20912 unsigned long mask = l_mask;
20913 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20914 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20917 if (high_regs_pushed)
20919 unsigned pushable_regs;
20920 unsigned next_hi_reg;
20922 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20923 if (live_regs_mask & (1 << next_hi_reg))
20926 pushable_regs = l_mask & 0xff;
20928 if (pushable_regs == 0)
20929 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20931 while (high_regs_pushed > 0)
20933 unsigned long real_regs_mask = 0;
20935 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20937 if (pushable_regs & (1 << regno))
20939 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20941 high_regs_pushed --;
20942 real_regs_mask |= (1 << next_hi_reg);
20944 if (high_regs_pushed)
20946 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20948 if (live_regs_mask & (1 << next_hi_reg))
20953 pushable_regs &= ~((1 << regno) - 1);
20959 /* If we had to find a work register and we have not yet
20960 saved the LR then add it to the list of regs to push. */
20961 if (l_mask == (1 << LR_REGNUM))
20963 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20965 real_regs_mask | (1 << LR_REGNUM));
20969 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20974 /* Handle the case of a double word load into a low register from
20975 a computed memory address. The computed address may involve a
20976 register which is overwritten by the load. */
20978 thumb_load_double_from_address (rtx *operands)
20986 gcc_assert (GET_CODE (operands[0]) == REG);
20987 gcc_assert (GET_CODE (operands[1]) == MEM);
20989 /* Get the memory address. */
20990 addr = XEXP (operands[1], 0);
20992 /* Work out how the memory address is computed. */
20993 switch (GET_CODE (addr))
20996 operands[2] = adjust_address (operands[1], SImode, 4);
20998 if (REGNO (operands[0]) == REGNO (addr))
21000 output_asm_insn ("ldr\t%H0, %2", operands);
21001 output_asm_insn ("ldr\t%0, %1", operands);
21005 output_asm_insn ("ldr\t%0, %1", operands);
21006 output_asm_insn ("ldr\t%H0, %2", operands);
21011 /* Compute <address> + 4 for the high order load. */
21012 operands[2] = adjust_address (operands[1], SImode, 4);
21014 output_asm_insn ("ldr\t%0, %1", operands);
21015 output_asm_insn ("ldr\t%H0, %2", operands);
21019 arg1 = XEXP (addr, 0);
21020 arg2 = XEXP (addr, 1);
21022 if (CONSTANT_P (arg1))
21023 base = arg2, offset = arg1;
21025 base = arg1, offset = arg2;
21027 gcc_assert (GET_CODE (base) == REG);
21029 /* Catch the case of <address> = <reg> + <reg> */
21030 if (GET_CODE (offset) == REG)
21032 int reg_offset = REGNO (offset);
21033 int reg_base = REGNO (base);
21034 int reg_dest = REGNO (operands[0]);
21036 /* Add the base and offset registers together into the
21037 higher destination register. */
21038 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21039 reg_dest + 1, reg_base, reg_offset);
21041 /* Load the lower destination register from the address in
21042 the higher destination register. */
21043 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21044 reg_dest, reg_dest + 1);
21046 /* Load the higher destination register from its own address
21048 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21049 reg_dest + 1, reg_dest + 1);
21053 /* Compute <address> + 4 for the high order load. */
21054 operands[2] = adjust_address (operands[1], SImode, 4);
21056 /* If the computed address is held in the low order register
21057 then load the high order register first, otherwise always
21058 load the low order register first. */
21059 if (REGNO (operands[0]) == REGNO (base))
21061 output_asm_insn ("ldr\t%H0, %2", operands);
21062 output_asm_insn ("ldr\t%0, %1", operands);
21066 output_asm_insn ("ldr\t%0, %1", operands);
21067 output_asm_insn ("ldr\t%H0, %2", operands);
21073 /* With no registers to worry about we can just load the value
21075 operands[2] = adjust_address (operands[1], SImode, 4);
21077 output_asm_insn ("ldr\t%H0, %2", operands);
21078 output_asm_insn ("ldr\t%0, %1", operands);
21082 gcc_unreachable ();
21089 thumb_output_move_mem_multiple (int n, rtx *operands)
21096 if (REGNO (operands[4]) > REGNO (operands[5]))
21099 operands[4] = operands[5];
21102 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21103 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21107 if (REGNO (operands[4]) > REGNO (operands[5]))
21110 operands[4] = operands[5];
21113 if (REGNO (operands[5]) > REGNO (operands[6]))
21116 operands[5] = operands[6];
21119 if (REGNO (operands[4]) > REGNO (operands[5]))
21122 operands[4] = operands[5];
21126 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21127 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21131 gcc_unreachable ();
21137 /* Output a call-via instruction for thumb state. */
21139 thumb_call_via_reg (rtx reg)
21141 int regno = REGNO (reg);
21144 gcc_assert (regno < LR_REGNUM);
21146 /* If we are in the normal text section we can use a single instance
21147 per compilation unit. If we are doing function sections, then we need
21148 an entry per section, since we can't rely on reachability. */
21149 if (in_section == text_section)
21151 thumb_call_reg_needed = 1;
21153 if (thumb_call_via_label[regno] == NULL)
21154 thumb_call_via_label[regno] = gen_label_rtx ();
21155 labelp = thumb_call_via_label + regno;
21159 if (cfun->machine->call_via[regno] == NULL)
21160 cfun->machine->call_via[regno] = gen_label_rtx ();
21161 labelp = cfun->machine->call_via + regno;
21164 output_asm_insn ("bl\t%a0", labelp);
21168 /* Routines for generating rtl. */
21170 thumb_expand_movmemqi (rtx *operands)
21172 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21173 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21174 HOST_WIDE_INT len = INTVAL (operands[2]);
21175 HOST_WIDE_INT offset = 0;
21179 emit_insn (gen_movmem12b (out, in, out, in));
21185 emit_insn (gen_movmem8b (out, in, out, in));
21191 rtx reg = gen_reg_rtx (SImode);
21192 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21193 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21200 rtx reg = gen_reg_rtx (HImode);
21201 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21202 plus_constant (in, offset))));
21203 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21211 rtx reg = gen_reg_rtx (QImode);
21212 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21213 plus_constant (in, offset))));
21214 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21220 thumb_reload_out_hi (rtx *operands)
21222 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21225 /* Handle reading a half-word from memory during reload. */
21227 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21229 gcc_unreachable ();
21232 /* Return the length of a function name prefix
21233 that starts with the character 'c'. */
21235 arm_get_strip_length (int c)
21239 ARM_NAME_ENCODING_LENGTHS
21244 /* Return a pointer to a function's name with any
21245 and all prefix encodings stripped from it. */
21247 arm_strip_name_encoding (const char *name)
21251 while ((skip = arm_get_strip_length (* name)))
21257 /* If there is a '*' anywhere in the name's prefix, then
21258 emit the stripped name verbatim, otherwise prepend an
21259 underscore if leading underscores are being used. */
21261 arm_asm_output_labelref (FILE *stream, const char *name)
21266 while ((skip = arm_get_strip_length (* name)))
21268 verbatim |= (*name == '*');
21273 fputs (name, stream);
21275 asm_fprintf (stream, "%U%s", name);
21279 arm_file_start (void)
21283 if (TARGET_UNIFIED_ASM)
21284 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21288 const char *fpu_name;
21289 if (arm_selected_arch)
21290 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21292 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21294 if (TARGET_SOFT_FLOAT)
21297 fpu_name = "softvfp";
21299 fpu_name = "softfpa";
21303 fpu_name = arm_fpu_desc->name;
21304 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21306 if (TARGET_HARD_FLOAT)
21307 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21308 if (TARGET_HARD_FLOAT_ABI)
21309 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21312 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21314 /* Some of these attributes only apply when the corresponding features
21315 are used. However we don't have any easy way of figuring this out.
21316 Conservatively record the setting that would have been used. */
21318 /* Tag_ABI_FP_rounding. */
21319 if (flag_rounding_math)
21320 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21321 if (!flag_unsafe_math_optimizations)
21323 /* Tag_ABI_FP_denomal. */
21324 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21325 /* Tag_ABI_FP_exceptions. */
21326 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21328 /* Tag_ABI_FP_user_exceptions. */
21329 if (flag_signaling_nans)
21330 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21331 /* Tag_ABI_FP_number_model. */
21332 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21333 flag_finite_math_only ? 1 : 3);
21335 /* Tag_ABI_align8_needed. */
21336 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21337 /* Tag_ABI_align8_preserved. */
21338 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21339 /* Tag_ABI_enum_size. */
21340 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21341 flag_short_enums ? 1 : 2);
21343 /* Tag_ABI_optimization_goals. */
21346 else if (optimize >= 2)
21352 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21354 /* Tag_ABI_FP_16bit_format. */
21355 if (arm_fp16_format)
21356 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21357 (int)arm_fp16_format);
21359 if (arm_lang_output_object_attributes_hook)
21360 arm_lang_output_object_attributes_hook();
21362 default_file_start();
21366 arm_file_end (void)
21370 if (NEED_INDICATE_EXEC_STACK)
21371 /* Add .note.GNU-stack. */
21372 file_end_indicate_exec_stack ();
21374 if (! thumb_call_reg_needed)
21377 switch_to_section (text_section);
21378 asm_fprintf (asm_out_file, "\t.code 16\n");
21379 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21381 for (regno = 0; regno < LR_REGNUM; regno++)
21383 rtx label = thumb_call_via_label[regno];
21387 targetm.asm_out.internal_label (asm_out_file, "L",
21388 CODE_LABEL_NUMBER (label));
21389 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21395 /* Symbols in the text segment can be accessed without indirecting via the
21396 constant pool; it may take an extra binary operation, but this is still
21397 faster than indirecting via memory. Don't do this when not optimizing,
21398 since we won't be calculating al of the offsets necessary to do this
21402 arm_encode_section_info (tree decl, rtx rtl, int first)
21404 if (optimize > 0 && TREE_CONSTANT (decl))
21405 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21407 default_encode_section_info (decl, rtl, first);
21409 #endif /* !ARM_PE */
21412 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21414 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21415 && !strcmp (prefix, "L"))
21417 arm_ccfsm_state = 0;
21418 arm_target_insn = NULL;
21420 default_internal_label (stream, prefix, labelno);
21423 /* Output code to add DELTA to the first argument, and then jump
21424 to FUNCTION. Used for C++ multiple inheritance. */
21426 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21427 HOST_WIDE_INT delta,
21428 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21431 static int thunk_label = 0;
21434 int mi_delta = delta;
21435 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21437 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21440 mi_delta = - mi_delta;
21444 int labelno = thunk_label++;
21445 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21446 /* Thunks are entered in arm mode when avaiable. */
21447 if (TARGET_THUMB1_ONLY)
21449 /* push r3 so we can use it as a temporary. */
21450 /* TODO: Omit this save if r3 is not used. */
21451 fputs ("\tpush {r3}\n", file);
21452 fputs ("\tldr\tr3, ", file);
21456 fputs ("\tldr\tr12, ", file);
21458 assemble_name (file, label);
21459 fputc ('\n', file);
21462 /* If we are generating PIC, the ldr instruction below loads
21463 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21464 the address of the add + 8, so we have:
21466 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21469 Note that we have "+ 1" because some versions of GNU ld
21470 don't set the low bit of the result for R_ARM_REL32
21471 relocations against thumb function symbols.
21472 On ARMv6M this is +4, not +8. */
21473 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21474 assemble_name (file, labelpc);
21475 fputs (":\n", file);
21476 if (TARGET_THUMB1_ONLY)
21478 /* This is 2 insns after the start of the thunk, so we know it
21479 is 4-byte aligned. */
21480 fputs ("\tadd\tr3, pc, r3\n", file);
21481 fputs ("\tmov r12, r3\n", file);
21484 fputs ("\tadd\tr12, pc, r12\n", file);
21486 else if (TARGET_THUMB1_ONLY)
21487 fputs ("\tmov r12, r3\n", file);
21489 if (TARGET_THUMB1_ONLY)
21491 if (mi_delta > 255)
21493 fputs ("\tldr\tr3, ", file);
21494 assemble_name (file, label);
21495 fputs ("+4\n", file);
21496 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21497 mi_op, this_regno, this_regno);
21499 else if (mi_delta != 0)
21501 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21502 mi_op, this_regno, this_regno,
21508 /* TODO: Use movw/movt for large constants when available. */
21509 while (mi_delta != 0)
21511 if ((mi_delta & (3 << shift)) == 0)
21515 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21516 mi_op, this_regno, this_regno,
21517 mi_delta & (0xff << shift));
21518 mi_delta &= ~(0xff << shift);
21525 if (TARGET_THUMB1_ONLY)
21526 fputs ("\tpop\t{r3}\n", file);
21528 fprintf (file, "\tbx\tr12\n");
21529 ASM_OUTPUT_ALIGN (file, 2);
21530 assemble_name (file, label);
21531 fputs (":\n", file);
21534 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21535 rtx tem = XEXP (DECL_RTL (function), 0);
21536 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21537 tem = gen_rtx_MINUS (GET_MODE (tem),
21539 gen_rtx_SYMBOL_REF (Pmode,
21540 ggc_strdup (labelpc)));
21541 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21544 /* Output ".word .LTHUNKn". */
21545 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21547 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21548 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21552 fputs ("\tb\t", file);
21553 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21554 if (NEED_PLT_RELOC)
21555 fputs ("(PLT)", file);
21556 fputc ('\n', file);
21561 arm_emit_vector_const (FILE *file, rtx x)
21564 const char * pattern;
21566 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21568 switch (GET_MODE (x))
21570 case V2SImode: pattern = "%08x"; break;
21571 case V4HImode: pattern = "%04x"; break;
21572 case V8QImode: pattern = "%02x"; break;
21573 default: gcc_unreachable ();
21576 fprintf (file, "0x");
21577 for (i = CONST_VECTOR_NUNITS (x); i--;)
21581 element = CONST_VECTOR_ELT (x, i);
21582 fprintf (file, pattern, INTVAL (element));
21588 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21589 HFmode constant pool entries are actually loaded with ldr. */
21591 arm_emit_fp16_const (rtx c)
21596 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21597 bits = real_to_target (NULL, &r, HFmode);
21598 if (WORDS_BIG_ENDIAN)
21599 assemble_zeros (2);
21600 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21601 if (!WORDS_BIG_ENDIAN)
21602 assemble_zeros (2);
21606 arm_output_load_gr (rtx *operands)
21613 if (GET_CODE (operands [1]) != MEM
21614 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21615 || GET_CODE (reg = XEXP (sum, 0)) != REG
21616 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21617 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21618 return "wldrw%?\t%0, %1";
21620 /* Fix up an out-of-range load of a GR register. */
21621 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21622 wcgr = operands[0];
21624 output_asm_insn ("ldr%?\t%0, %1", operands);
21626 operands[0] = wcgr;
21628 output_asm_insn ("tmcr%?\t%0, %1", operands);
21629 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21634 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21636 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21637 named arg and all anonymous args onto the stack.
21638 XXX I know the prologue shouldn't be pushing registers, but it is faster
21642 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21643 enum machine_mode mode,
21646 int second_time ATTRIBUTE_UNUSED)
21650 cfun->machine->uses_anonymous_args = 1;
21651 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21653 nregs = pcum->aapcs_ncrn;
21654 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21658 nregs = pcum->nregs;
21660 if (nregs < NUM_ARG_REGS)
21661 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21664 /* Return nonzero if the CONSUMER instruction (a store) does not need
21665 PRODUCER's value to calculate the address. */
21668 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21670 rtx value = PATTERN (producer);
21671 rtx addr = PATTERN (consumer);
21673 if (GET_CODE (value) == COND_EXEC)
21674 value = COND_EXEC_CODE (value);
21675 if (GET_CODE (value) == PARALLEL)
21676 value = XVECEXP (value, 0, 0);
21677 value = XEXP (value, 0);
21678 if (GET_CODE (addr) == COND_EXEC)
21679 addr = COND_EXEC_CODE (addr);
21680 if (GET_CODE (addr) == PARALLEL)
21681 addr = XVECEXP (addr, 0, 0);
21682 addr = XEXP (addr, 0);
21684 return !reg_overlap_mentioned_p (value, addr);
21687 /* Return nonzero if the CONSUMER instruction (a store) does need
21688 PRODUCER's value to calculate the address. */
21691 arm_early_store_addr_dep (rtx producer, rtx consumer)
21693 return !arm_no_early_store_addr_dep (producer, consumer);
21696 /* Return nonzero if the CONSUMER instruction (a load) does need
21697 PRODUCER's value to calculate the address. */
21700 arm_early_load_addr_dep (rtx producer, rtx consumer)
21702 rtx value = PATTERN (producer);
21703 rtx addr = PATTERN (consumer);
21705 if (GET_CODE (value) == COND_EXEC)
21706 value = COND_EXEC_CODE (value);
21707 if (GET_CODE (value) == PARALLEL)
21708 value = XVECEXP (value, 0, 0);
21709 value = XEXP (value, 0);
21710 if (GET_CODE (addr) == COND_EXEC)
21711 addr = COND_EXEC_CODE (addr);
21712 if (GET_CODE (addr) == PARALLEL)
21713 addr = XVECEXP (addr, 0, 0);
21714 addr = XEXP (addr, 1);
21716 return reg_overlap_mentioned_p (value, addr);
21719 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21720 have an early register shift value or amount dependency on the
21721 result of PRODUCER. */
21724 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21726 rtx value = PATTERN (producer);
21727 rtx op = PATTERN (consumer);
21730 if (GET_CODE (value) == COND_EXEC)
21731 value = COND_EXEC_CODE (value);
21732 if (GET_CODE (value) == PARALLEL)
21733 value = XVECEXP (value, 0, 0);
21734 value = XEXP (value, 0);
21735 if (GET_CODE (op) == COND_EXEC)
21736 op = COND_EXEC_CODE (op);
21737 if (GET_CODE (op) == PARALLEL)
21738 op = XVECEXP (op, 0, 0);
21741 early_op = XEXP (op, 0);
21742 /* This is either an actual independent shift, or a shift applied to
21743 the first operand of another operation. We want the whole shift
21745 if (GET_CODE (early_op) == REG)
21748 return !reg_overlap_mentioned_p (value, early_op);
21751 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21752 have an early register shift value dependency on the result of
21756 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21758 rtx value = PATTERN (producer);
21759 rtx op = PATTERN (consumer);
21762 if (GET_CODE (value) == COND_EXEC)
21763 value = COND_EXEC_CODE (value);
21764 if (GET_CODE (value) == PARALLEL)
21765 value = XVECEXP (value, 0, 0);
21766 value = XEXP (value, 0);
21767 if (GET_CODE (op) == COND_EXEC)
21768 op = COND_EXEC_CODE (op);
21769 if (GET_CODE (op) == PARALLEL)
21770 op = XVECEXP (op, 0, 0);
21773 early_op = XEXP (op, 0);
21775 /* This is either an actual independent shift, or a shift applied to
21776 the first operand of another operation. We want the value being
21777 shifted, in either case. */
21778 if (GET_CODE (early_op) != REG)
21779 early_op = XEXP (early_op, 0);
21781 return !reg_overlap_mentioned_p (value, early_op);
21784 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21785 have an early register mult dependency on the result of
21789 arm_no_early_mul_dep (rtx producer, rtx consumer)
21791 rtx value = PATTERN (producer);
21792 rtx op = PATTERN (consumer);
21794 if (GET_CODE (value) == COND_EXEC)
21795 value = COND_EXEC_CODE (value);
21796 if (GET_CODE (value) == PARALLEL)
21797 value = XVECEXP (value, 0, 0);
21798 value = XEXP (value, 0);
21799 if (GET_CODE (op) == COND_EXEC)
21800 op = COND_EXEC_CODE (op);
21801 if (GET_CODE (op) == PARALLEL)
21802 op = XVECEXP (op, 0, 0);
21805 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21807 if (GET_CODE (XEXP (op, 0)) == MULT)
21808 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21810 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21816 /* We can't rely on the caller doing the proper promotion when
21817 using APCS or ATPCS. */
21820 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21822 return !TARGET_AAPCS_BASED;
21825 static enum machine_mode
21826 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21827 enum machine_mode mode,
21828 int *punsignedp ATTRIBUTE_UNUSED,
21829 const_tree fntype ATTRIBUTE_UNUSED,
21830 int for_return ATTRIBUTE_UNUSED)
21832 if (GET_MODE_CLASS (mode) == MODE_INT
21833 && GET_MODE_SIZE (mode) < 4)
21839 /* AAPCS based ABIs use short enums by default. */
21842 arm_default_short_enums (void)
21844 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21848 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21851 arm_align_anon_bitfield (void)
21853 return TARGET_AAPCS_BASED;
21857 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21860 arm_cxx_guard_type (void)
21862 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21865 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21866 has an accumulator dependency on the result of the producer (a
21867 multiplication instruction) and no other dependency on that result. */
21869 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21871 rtx mul = PATTERN (producer);
21872 rtx mac = PATTERN (consumer);
21874 rtx mac_op0, mac_op1, mac_acc;
21876 if (GET_CODE (mul) == COND_EXEC)
21877 mul = COND_EXEC_CODE (mul);
21878 if (GET_CODE (mac) == COND_EXEC)
21879 mac = COND_EXEC_CODE (mac);
21881 /* Check that mul is of the form (set (...) (mult ...))
21882 and mla is of the form (set (...) (plus (mult ...) (...))). */
21883 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21884 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21885 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21888 mul_result = XEXP (mul, 0);
21889 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21890 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21891 mac_acc = XEXP (XEXP (mac, 1), 1);
21893 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21894 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21895 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21899 /* The EABI says test the least significant bit of a guard variable. */
21902 arm_cxx_guard_mask_bit (void)
21904 return TARGET_AAPCS_BASED;
21908 /* The EABI specifies that all array cookies are 8 bytes long. */
21911 arm_get_cookie_size (tree type)
21915 if (!TARGET_AAPCS_BASED)
21916 return default_cxx_get_cookie_size (type);
21918 size = build_int_cst (sizetype, 8);
21923 /* The EABI says that array cookies should also contain the element size. */
21926 arm_cookie_has_size (void)
21928 return TARGET_AAPCS_BASED;
21932 /* The EABI says constructors and destructors should return a pointer to
21933 the object constructed/destroyed. */
21936 arm_cxx_cdtor_returns_this (void)
21938 return TARGET_AAPCS_BASED;
21941 /* The EABI says that an inline function may never be the key
21945 arm_cxx_key_method_may_be_inline (void)
21947 return !TARGET_AAPCS_BASED;
21951 arm_cxx_determine_class_data_visibility (tree decl)
21953 if (!TARGET_AAPCS_BASED
21954 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21957 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21958 is exported. However, on systems without dynamic vague linkage,
21959 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21960 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21961 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21963 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21964 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21968 arm_cxx_class_data_always_comdat (void)
21970 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21971 vague linkage if the class has no key function. */
21972 return !TARGET_AAPCS_BASED;
21976 /* The EABI says __aeabi_atexit should be used to register static
21980 arm_cxx_use_aeabi_atexit (void)
21982 return TARGET_AAPCS_BASED;
21987 arm_set_return_address (rtx source, rtx scratch)
21989 arm_stack_offsets *offsets;
21990 HOST_WIDE_INT delta;
21992 unsigned long saved_regs;
21994 offsets = arm_get_frame_offsets ();
21995 saved_regs = offsets->saved_regs_mask;
21997 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21998 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22001 if (frame_pointer_needed)
22002 addr = plus_constant(hard_frame_pointer_rtx, -4);
22005 /* LR will be the first saved register. */
22006 delta = offsets->outgoing_args - (offsets->frame + 4);
22011 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22012 GEN_INT (delta & ~4095)));
22017 addr = stack_pointer_rtx;
22019 addr = plus_constant (addr, delta);
22021 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22027 thumb_set_return_address (rtx source, rtx scratch)
22029 arm_stack_offsets *offsets;
22030 HOST_WIDE_INT delta;
22031 HOST_WIDE_INT limit;
22034 unsigned long mask;
22038 offsets = arm_get_frame_offsets ();
22039 mask = offsets->saved_regs_mask;
22040 if (mask & (1 << LR_REGNUM))
22043 /* Find the saved regs. */
22044 if (frame_pointer_needed)
22046 delta = offsets->soft_frame - offsets->saved_args;
22047 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22053 delta = offsets->outgoing_args - offsets->saved_args;
22056 /* Allow for the stack frame. */
22057 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22059 /* The link register is always the first saved register. */
22062 /* Construct the address. */
22063 addr = gen_rtx_REG (SImode, reg);
22066 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22067 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22071 addr = plus_constant (addr, delta);
22073 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22076 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22079 /* Implements target hook vector_mode_supported_p. */
22081 arm_vector_mode_supported_p (enum machine_mode mode)
22083 /* Neon also supports V2SImode, etc. listed in the clause below. */
22084 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22085 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22088 if ((TARGET_NEON || TARGET_IWMMXT)
22089 && ((mode == V2SImode)
22090 || (mode == V4HImode)
22091 || (mode == V8QImode)))
22097 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22098 registers when autovectorizing for Neon, at least until multiple vector
22099 widths are supported properly by the middle-end. */
22101 static enum machine_mode
22102 arm_preferred_simd_mode (enum machine_mode mode)
22108 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22110 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22112 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22114 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22116 if (TARGET_NEON_VECTORIZE_QUAD)
22123 if (TARGET_REALLY_IWMMXT)
22139 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22141 We need to define this for LO_REGS on thumb. Otherwise we can end up
22142 using r0-r4 for function arguments, r7 for the stack frame and don't
22143 have enough left over to do doubleword arithmetic. */
22146 arm_class_likely_spilled_p (reg_class_t rclass)
22148 if ((TARGET_THUMB && rclass == LO_REGS)
22149 || rclass == CC_REG)
22155 /* Implements target hook small_register_classes_for_mode_p. */
22157 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22159 return TARGET_THUMB1;
22162 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22163 ARM insns and therefore guarantee that the shift count is modulo 256.
22164 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22165 guarantee no particular behavior for out-of-range counts. */
22167 static unsigned HOST_WIDE_INT
22168 arm_shift_truncation_mask (enum machine_mode mode)
22170 return mode == SImode ? 255 : 0;
22174 /* Map internal gcc register numbers to DWARF2 register numbers. */
22177 arm_dbx_register_number (unsigned int regno)
22182 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22183 compatibility. The EABI defines them as registers 96-103. */
22184 if (IS_FPA_REGNUM (regno))
22185 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22187 if (IS_VFP_REGNUM (regno))
22189 /* See comment in arm_dwarf_register_span. */
22190 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22191 return 64 + regno - FIRST_VFP_REGNUM;
22193 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22196 if (IS_IWMMXT_GR_REGNUM (regno))
22197 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22199 if (IS_IWMMXT_REGNUM (regno))
22200 return 112 + regno - FIRST_IWMMXT_REGNUM;
22202 gcc_unreachable ();
22205 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22206 GCC models tham as 64 32-bit registers, so we need to describe this to
22207 the DWARF generation code. Other registers can use the default. */
22209 arm_dwarf_register_span (rtx rtl)
22216 regno = REGNO (rtl);
22217 if (!IS_VFP_REGNUM (regno))
22220 /* XXX FIXME: The EABI defines two VFP register ranges:
22221 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22223 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22224 corresponding D register. Until GDB supports this, we shall use the
22225 legacy encodings. We also use these encodings for D0-D15 for
22226 compatibility with older debuggers. */
22227 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22230 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22231 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22232 regno = (regno - FIRST_VFP_REGNUM) / 2;
22233 for (i = 0; i < nregs; i++)
22234 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22239 #if ARM_UNWIND_INFO
22240 /* Emit unwind directives for a store-multiple instruction or stack pointer
22241 push during alignment.
22242 These should only ever be generated by the function prologue code, so
22243 expect them to have a particular form. */
22246 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22249 HOST_WIDE_INT offset;
22250 HOST_WIDE_INT nregs;
22256 e = XVECEXP (p, 0, 0);
22257 if (GET_CODE (e) != SET)
22260 /* First insn will adjust the stack pointer. */
22261 if (GET_CODE (e) != SET
22262 || GET_CODE (XEXP (e, 0)) != REG
22263 || REGNO (XEXP (e, 0)) != SP_REGNUM
22264 || GET_CODE (XEXP (e, 1)) != PLUS)
22267 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22268 nregs = XVECLEN (p, 0) - 1;
22270 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22273 /* The function prologue may also push pc, but not annotate it as it is
22274 never restored. We turn this into a stack pointer adjustment. */
22275 if (nregs * 4 == offset - 4)
22277 fprintf (asm_out_file, "\t.pad #4\n");
22281 fprintf (asm_out_file, "\t.save {");
22283 else if (IS_VFP_REGNUM (reg))
22286 fprintf (asm_out_file, "\t.vsave {");
22288 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22290 /* FPA registers are done differently. */
22291 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22295 /* Unknown register type. */
22298 /* If the stack increment doesn't match the size of the saved registers,
22299 something has gone horribly wrong. */
22300 if (offset != nregs * reg_size)
22305 /* The remaining insns will describe the stores. */
22306 for (i = 1; i <= nregs; i++)
22308 /* Expect (set (mem <addr>) (reg)).
22309 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22310 e = XVECEXP (p, 0, i);
22311 if (GET_CODE (e) != SET
22312 || GET_CODE (XEXP (e, 0)) != MEM
22313 || GET_CODE (XEXP (e, 1)) != REG)
22316 reg = REGNO (XEXP (e, 1));
22321 fprintf (asm_out_file, ", ");
22322 /* We can't use %r for vfp because we need to use the
22323 double precision register names. */
22324 if (IS_VFP_REGNUM (reg))
22325 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22327 asm_fprintf (asm_out_file, "%r", reg);
22329 #ifdef ENABLE_CHECKING
22330 /* Check that the addresses are consecutive. */
22331 e = XEXP (XEXP (e, 0), 0);
22332 if (GET_CODE (e) == PLUS)
22334 offset += reg_size;
22335 if (GET_CODE (XEXP (e, 0)) != REG
22336 || REGNO (XEXP (e, 0)) != SP_REGNUM
22337 || GET_CODE (XEXP (e, 1)) != CONST_INT
22338 || offset != INTVAL (XEXP (e, 1)))
22342 || GET_CODE (e) != REG
22343 || REGNO (e) != SP_REGNUM)
22347 fprintf (asm_out_file, "}\n");
22350 /* Emit unwind directives for a SET. */
22353 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22361 switch (GET_CODE (e0))
22364 /* Pushing a single register. */
22365 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22366 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22367 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22370 asm_fprintf (asm_out_file, "\t.save ");
22371 if (IS_VFP_REGNUM (REGNO (e1)))
22372 asm_fprintf(asm_out_file, "{d%d}\n",
22373 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22375 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22379 if (REGNO (e0) == SP_REGNUM)
22381 /* A stack increment. */
22382 if (GET_CODE (e1) != PLUS
22383 || GET_CODE (XEXP (e1, 0)) != REG
22384 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22385 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22388 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22389 -INTVAL (XEXP (e1, 1)));
22391 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22393 HOST_WIDE_INT offset;
22395 if (GET_CODE (e1) == PLUS)
22397 if (GET_CODE (XEXP (e1, 0)) != REG
22398 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22400 reg = REGNO (XEXP (e1, 0));
22401 offset = INTVAL (XEXP (e1, 1));
22402 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22403 HARD_FRAME_POINTER_REGNUM, reg,
22406 else if (GET_CODE (e1) == REG)
22409 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22410 HARD_FRAME_POINTER_REGNUM, reg);
22415 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22417 /* Move from sp to reg. */
22418 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22420 else if (GET_CODE (e1) == PLUS
22421 && GET_CODE (XEXP (e1, 0)) == REG
22422 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22423 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22425 /* Set reg to offset from sp. */
22426 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22427 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22429 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22431 /* Stack pointer save before alignment. */
22433 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22446 /* Emit unwind directives for the given insn. */
22449 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22453 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22456 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22457 && (TREE_NOTHROW (current_function_decl)
22458 || crtl->all_throwers_are_sibcalls))
22461 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22464 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22466 pat = XEXP (pat, 0);
22468 pat = PATTERN (insn);
22470 switch (GET_CODE (pat))
22473 arm_unwind_emit_set (asm_out_file, pat);
22477 /* Store multiple. */
22478 arm_unwind_emit_sequence (asm_out_file, pat);
22487 /* Output a reference from a function exception table to the type_info
22488 object X. The EABI specifies that the symbol should be relocated by
22489 an R_ARM_TARGET2 relocation. */
22492 arm_output_ttype (rtx x)
22494 fputs ("\t.word\t", asm_out_file);
22495 output_addr_const (asm_out_file, x);
22496 /* Use special relocations for symbol references. */
22497 if (GET_CODE (x) != CONST_INT)
22498 fputs ("(TARGET2)", asm_out_file);
22499 fputc ('\n', asm_out_file);
22504 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22507 arm_asm_emit_except_personality (rtx personality)
22509 fputs ("\t.personality\t", asm_out_file);
22510 output_addr_const (asm_out_file, personality);
22511 fputc ('\n', asm_out_file);
22514 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22517 arm_asm_init_sections (void)
22519 exception_section = get_unnamed_section (0, output_section_asm_op,
22522 #endif /* ARM_UNWIND_INFO */
22524 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22526 static enum unwind_info_type
22527 arm_except_unwind_info (struct gcc_options *opts)
22529 /* Honor the --enable-sjlj-exceptions configure switch. */
22530 #ifdef CONFIG_SJLJ_EXCEPTIONS
22531 if (CONFIG_SJLJ_EXCEPTIONS)
22535 /* If not using ARM EABI unwind tables... */
22536 if (ARM_UNWIND_INFO)
22538 /* For simplicity elsewhere in this file, indicate that all unwind
22539 info is disabled if we're not emitting unwind tables. */
22540 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22546 /* ... we use sjlj exceptions for backwards compatibility. */
22551 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22552 stack alignment. */
22555 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22557 rtx unspec = SET_SRC (pattern);
22558 gcc_assert (GET_CODE (unspec) == UNSPEC);
22562 case UNSPEC_STACK_ALIGN:
22563 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22564 put anything on the stack, so hopefully it won't matter.
22565 CFA = SP will be correct after alignment. */
22566 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22567 SET_DEST (pattern));
22570 gcc_unreachable ();
22575 /* Output unwind directives for the start/end of a function. */
22578 arm_output_fn_unwind (FILE * f, bool prologue)
22580 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22584 fputs ("\t.fnstart\n", f);
22587 /* If this function will never be unwound, then mark it as such.
22588 The came condition is used in arm_unwind_emit to suppress
22589 the frame annotations. */
22590 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22591 && (TREE_NOTHROW (current_function_decl)
22592 || crtl->all_throwers_are_sibcalls))
22593 fputs("\t.cantunwind\n", f);
22595 fputs ("\t.fnend\n", f);
22600 arm_emit_tls_decoration (FILE *fp, rtx x)
22602 enum tls_reloc reloc;
22605 val = XVECEXP (x, 0, 0);
22606 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22608 output_addr_const (fp, val);
22613 fputs ("(tlsgd)", fp);
22616 fputs ("(tlsldm)", fp);
22619 fputs ("(tlsldo)", fp);
22622 fputs ("(gottpoff)", fp);
22625 fputs ("(tpoff)", fp);
22628 gcc_unreachable ();
22636 fputs (" + (. - ", fp);
22637 output_addr_const (fp, XVECEXP (x, 0, 2));
22639 output_addr_const (fp, XVECEXP (x, 0, 3));
22649 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22652 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22654 gcc_assert (size == 4);
22655 fputs ("\t.word\t", file);
22656 output_addr_const (file, x);
22657 fputs ("(tlsldo)", file);
22660 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22663 arm_output_addr_const_extra (FILE *fp, rtx x)
22665 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22666 return arm_emit_tls_decoration (fp, x);
22667 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22670 int labelno = INTVAL (XVECEXP (x, 0, 0));
22672 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22673 assemble_name_raw (fp, label);
22677 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22679 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22683 output_addr_const (fp, XVECEXP (x, 0, 0));
22687 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22689 output_addr_const (fp, XVECEXP (x, 0, 0));
22693 output_addr_const (fp, XVECEXP (x, 0, 1));
22697 else if (GET_CODE (x) == CONST_VECTOR)
22698 return arm_emit_vector_const (fp, x);
22703 /* Output assembly for a shift instruction.
22704 SET_FLAGS determines how the instruction modifies the condition codes.
22705 0 - Do not set condition codes.
22706 1 - Set condition codes.
22707 2 - Use smallest instruction. */
22709 arm_output_shift(rtx * operands, int set_flags)
22712 static const char flag_chars[3] = {'?', '.', '!'};
22717 c = flag_chars[set_flags];
22718 if (TARGET_UNIFIED_ASM)
22720 shift = shift_op(operands[3], &val);
22724 operands[2] = GEN_INT(val);
22725 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22728 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22731 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22732 output_asm_insn (pattern, operands);
22736 /* Output a Thumb-1 casesi dispatch sequence. */
22738 thumb1_output_casesi (rtx *operands)
22740 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22742 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22744 switch (GET_MODE(diff_vec))
22747 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22748 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22750 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22751 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22753 return "bl\t%___gnu_thumb1_case_si";
22755 gcc_unreachable ();
22759 /* Output a Thumb-2 casesi instruction. */
22761 thumb2_output_casesi (rtx *operands)
22763 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22765 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22767 output_asm_insn ("cmp\t%0, %1", operands);
22768 output_asm_insn ("bhi\t%l3", operands);
22769 switch (GET_MODE(diff_vec))
22772 return "tbb\t[%|pc, %0]";
22774 return "tbh\t[%|pc, %0, lsl #1]";
22778 output_asm_insn ("adr\t%4, %l2", operands);
22779 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22780 output_asm_insn ("add\t%4, %4, %5", operands);
22785 output_asm_insn ("adr\t%4, %l2", operands);
22786 return "ldr\t%|pc, [%4, %0, lsl #2]";
22789 gcc_unreachable ();
22793 /* Most ARM cores are single issue, but some newer ones can dual issue.
22794 The scheduler descriptions rely on this being correct. */
22796 arm_issue_rate (void)
22812 /* A table and a function to perform ARM-specific name mangling for
22813 NEON vector types in order to conform to the AAPCS (see "Procedure
22814 Call Standard for the ARM Architecture", Appendix A). To qualify
22815 for emission with the mangled names defined in that document, a
22816 vector type must not only be of the correct mode but also be
22817 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22820 enum machine_mode mode;
22821 const char *element_type_name;
22822 const char *aapcs_name;
22823 } arm_mangle_map_entry;
22825 static arm_mangle_map_entry arm_mangle_map[] = {
22826 /* 64-bit containerized types. */
22827 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22828 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22829 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22830 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22831 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22832 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22833 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22834 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22835 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22836 /* 128-bit containerized types. */
22837 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22838 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22839 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22840 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22841 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22842 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22843 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22844 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22845 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22846 { VOIDmode, NULL, NULL }
22850 arm_mangle_type (const_tree type)
22852 arm_mangle_map_entry *pos = arm_mangle_map;
22854 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22855 has to be managled as if it is in the "std" namespace. */
22856 if (TARGET_AAPCS_BASED
22857 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22859 static bool warned;
22860 if (!warned && warn_psabi && !in_system_header)
22863 inform (input_location,
22864 "the mangling of %<va_list%> has changed in GCC 4.4");
22866 return "St9__va_list";
22869 /* Half-precision float. */
22870 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22873 if (TREE_CODE (type) != VECTOR_TYPE)
22876 /* Check the mode of the vector type, and the name of the vector
22877 element type, against the table. */
22878 while (pos->mode != VOIDmode)
22880 tree elt_type = TREE_TYPE (type);
22882 if (pos->mode == TYPE_MODE (type)
22883 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22884 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22885 pos->element_type_name))
22886 return pos->aapcs_name;
22891 /* Use the default mangling for unrecognized (possibly user-defined)
22896 /* Order of allocation of core registers for Thumb: this allocation is
22897 written over the corresponding initial entries of the array
22898 initialized with REG_ALLOC_ORDER. We allocate all low registers
22899 first. Saving and restoring a low register is usually cheaper than
22900 using a call-clobbered high register. */
22902 static const int thumb_core_reg_alloc_order[] =
22904 3, 2, 1, 0, 4, 5, 6, 7,
22905 14, 12, 8, 9, 10, 11, 13, 15
22908 /* Adjust register allocation order when compiling for Thumb. */
22911 arm_order_regs_for_local_alloc (void)
22913 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22914 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22916 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22917 sizeof (thumb_core_reg_alloc_order));
22920 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22923 arm_frame_pointer_required (void)
22925 return (cfun->has_nonlocal_label
22926 || SUBTARGET_FRAME_POINTER_REQUIRED
22927 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22930 /* Only thumb1 can't support conditional execution, so return true if
22931 the target is not thumb1. */
22933 arm_have_conditional_execution (void)
22935 return !TARGET_THUMB1;
22938 /* Legitimize a memory reference for sync primitive implemented using
22939 ldrex / strex. We currently force the form of the reference to be
22940 indirect without offset. We do not yet support the indirect offset
22941 addressing supported by some ARM targets for these
22944 arm_legitimize_sync_memory (rtx memory)
22946 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22947 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22949 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22950 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22951 return legitimate_memory;
22954 /* An instruction emitter. */
22955 typedef void (* emit_f) (int label, const char *, rtx *);
22957 /* An instruction emitter that emits via the conventional
22958 output_asm_insn. */
22960 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22962 output_asm_insn (pattern, operands);
22965 /* Count the number of emitted synchronization instructions. */
22966 static unsigned arm_insn_count;
22968 /* An emitter that counts emitted instructions but does not actually
22969 emit instruction into the the instruction stream. */
22971 arm_count (int label,
22972 const char *pattern ATTRIBUTE_UNUSED,
22973 rtx *operands ATTRIBUTE_UNUSED)
22979 /* Construct a pattern using conventional output formatting and feed
22980 it to output_asm_insn. Provides a mechanism to construct the
22981 output pattern on the fly. Note the hard limit on the pattern
22983 static void ATTRIBUTE_PRINTF_4
22984 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
22985 const char *pattern, ...)
22990 va_start (ap, pattern);
22991 vsprintf (buffer, pattern, ap);
22993 emit (label, buffer, operands);
22996 /* Emit the memory barrier instruction, if any, provided by this
22997 target to a specified emitter. */
22999 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23001 if (TARGET_HAVE_DMB)
23003 /* Note we issue a system level barrier. We should consider
23004 issuing a inner shareabilty zone barrier here instead, ie.
23006 emit (0, "dmb\tsy", operands);
23010 if (TARGET_HAVE_DMB_MCR)
23012 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23016 gcc_unreachable ();
23019 /* Emit the memory barrier instruction, if any, provided by this
23022 arm_output_memory_barrier (rtx *operands)
23024 arm_process_output_memory_barrier (arm_emit, operands);
23028 /* Helper to figure out the instruction suffix required on ldrex/strex
23029 for operations on an object of the specified mode. */
23030 static const char *
23031 arm_ldrex_suffix (enum machine_mode mode)
23035 case QImode: return "b";
23036 case HImode: return "h";
23037 case SImode: return "";
23038 case DImode: return "d";
23040 gcc_unreachable ();
23045 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23048 arm_output_ldrex (emit_f emit,
23049 enum machine_mode mode,
23053 const char *suffix = arm_ldrex_suffix (mode);
23056 operands[0] = target;
23057 operands[1] = memory;
23058 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23061 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23064 arm_output_strex (emit_f emit,
23065 enum machine_mode mode,
23071 const char *suffix = arm_ldrex_suffix (mode);
23074 operands[0] = result;
23075 operands[1] = value;
23076 operands[2] = memory;
23077 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23081 /* Helper to emit a two operand instruction. */
23083 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23089 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23092 /* Helper to emit a three operand instruction. */
23094 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23101 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23104 /* Emit a load store exclusive synchronization loop.
23108 if old_value != required_value
23110 t1 = sync_op (old_value, new_value)
23111 [mem] = t1, t2 = [0|1]
23115 t1 == t2 is not permitted
23116 t1 == old_value is permitted
23120 RTX register or const_int representing the required old_value for
23121 the modify to continue, if NULL no comparsion is performed. */
23123 arm_output_sync_loop (emit_f emit,
23124 enum machine_mode mode,
23127 rtx required_value,
23131 enum attr_sync_op sync_op,
23132 int early_barrier_required)
23136 gcc_assert (t1 != t2);
23138 if (early_barrier_required)
23139 arm_process_output_memory_barrier (emit, NULL);
23141 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23143 arm_output_ldrex (emit, mode, old_value, memory);
23145 if (required_value)
23149 operands[0] = old_value;
23150 operands[1] = required_value;
23151 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23152 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23158 arm_output_op3 (emit, "add", t1, old_value, new_value);
23162 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23166 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23170 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23174 arm_output_op3 (emit,"and", t1, old_value, new_value);
23178 arm_output_op3 (emit, "and", t1, old_value, new_value);
23179 arm_output_op2 (emit, "mvn", t1, t1);
23187 arm_output_strex (emit, mode, "", t2, t1, memory);
23189 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23190 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
23192 arm_process_output_memory_barrier (emit, NULL);
23193 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23197 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23200 default_value = operands[index - 1];
23202 return default_value;
23205 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23206 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23208 /* Extract the operands for a synchroniztion instruction from the
23209 instructions attributes and emit the instruction. */
23211 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23213 rtx result, memory, required_value, new_value, t1, t2;
23215 enum machine_mode mode;
23216 enum attr_sync_op sync_op;
23218 result = FETCH_SYNC_OPERAND(result, 0);
23219 memory = FETCH_SYNC_OPERAND(memory, 0);
23220 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23221 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23222 t1 = FETCH_SYNC_OPERAND(t1, 0);
23223 t2 = FETCH_SYNC_OPERAND(t2, 0);
23225 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23226 sync_op = get_attr_sync_op (insn);
23227 mode = GET_MODE (memory);
23229 arm_output_sync_loop (emit, mode, result, memory, required_value,
23230 new_value, t1, t2, sync_op, early_barrier);
23233 /* Emit a synchronization instruction loop. */
23235 arm_output_sync_insn (rtx insn, rtx *operands)
23237 arm_process_output_sync_insn (arm_emit, insn, operands);
23241 /* Count the number of machine instruction that will be emitted for a
23242 synchronization instruction. Note that the emitter used does not
23243 emit instructions, it just counts instructions being carefull not
23244 to count labels. */
23246 arm_sync_loop_insns (rtx insn, rtx *operands)
23248 arm_insn_count = 0;
23249 arm_process_output_sync_insn (arm_count, insn, operands);
23250 return arm_insn_count;
23253 /* Helper to call a target sync instruction generator, dealing with
23254 the variation in operands required by the different generators. */
23256 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23257 rtx memory, rtx required_value, rtx new_value)
23259 switch (generator->op)
23261 case arm_sync_generator_omn:
23262 gcc_assert (! required_value);
23263 return generator->u.omn (old_value, memory, new_value);
23265 case arm_sync_generator_omrn:
23266 gcc_assert (required_value);
23267 return generator->u.omrn (old_value, memory, required_value, new_value);
23273 /* Expand a synchronization loop. The synchronization loop is expanded
23274 as an opaque block of instructions in order to ensure that we do
23275 not subsequently get extraneous memory accesses inserted within the
23276 critical region. The exclusive access property of ldrex/strex is
23277 only guaranteed in there are no intervening memory accesses. */
23279 arm_expand_sync (enum machine_mode mode,
23280 struct arm_sync_generator *generator,
23281 rtx target, rtx memory, rtx required_value, rtx new_value)
23283 if (target == NULL)
23284 target = gen_reg_rtx (mode);
23286 memory = arm_legitimize_sync_memory (memory);
23287 if (mode != SImode)
23289 rtx load_temp = gen_reg_rtx (SImode);
23291 if (required_value)
23292 required_value = convert_modes (SImode, mode, required_value, true);
23294 new_value = convert_modes (SImode, mode, new_value, true);
23295 emit_insn (arm_call_generator (generator, load_temp, memory,
23296 required_value, new_value));
23297 emit_move_insn (target, gen_lowpart (mode, load_temp));
23301 emit_insn (arm_call_generator (generator, target, memory, required_value,
23307 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23309 /* Vectors which aren't in packed structures will not be less aligned than
23310 the natural alignment of their element type, so this is safe. */
23311 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23314 return default_builtin_vector_alignment_reachable (type, is_packed);
23318 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23319 const_tree type, int misalignment,
23322 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23324 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23329 /* If the misalignment is unknown, we should be able to handle the access
23330 so long as it is not to a member of a packed data structure. */
23331 if (misalignment == -1)
23334 /* Return true if the misalignment is a multiple of the natural alignment
23335 of the vector's element type. This is probably always going to be
23336 true in practice, since we've already established that this isn't a
23338 return ((misalignment % align) == 0);
23341 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23346 arm_conditional_register_usage (void)
23350 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23352 for (regno = FIRST_FPA_REGNUM;
23353 regno <= LAST_FPA_REGNUM; ++regno)
23354 fixed_regs[regno] = call_used_regs[regno] = 1;
23357 if (TARGET_THUMB1 && optimize_size)
23359 /* When optimizing for size on Thumb-1, it's better not
23360 to use the HI regs, because of the overhead of
23362 for (regno = FIRST_HI_REGNUM;
23363 regno <= LAST_HI_REGNUM; ++regno)
23364 fixed_regs[regno] = call_used_regs[regno] = 1;
23367 /* The link register can be clobbered by any branch insn,
23368 but we have no way to track that at present, so mark
23369 it as unavailable. */
23371 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23373 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23375 if (TARGET_MAVERICK)
23377 for (regno = FIRST_FPA_REGNUM;
23378 regno <= LAST_FPA_REGNUM; ++ regno)
23379 fixed_regs[regno] = call_used_regs[regno] = 1;
23380 for (regno = FIRST_CIRRUS_FP_REGNUM;
23381 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23383 fixed_regs[regno] = 0;
23384 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23389 /* VFPv3 registers are disabled when earlier VFP
23390 versions are selected due to the definition of
23391 LAST_VFP_REGNUM. */
23392 for (regno = FIRST_VFP_REGNUM;
23393 regno <= LAST_VFP_REGNUM; ++ regno)
23395 fixed_regs[regno] = 0;
23396 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23397 || regno >= FIRST_VFP_REGNUM + 32;
23402 if (TARGET_REALLY_IWMMXT)
23404 regno = FIRST_IWMMXT_GR_REGNUM;
23405 /* The 2002/10/09 revision of the XScale ABI has wCG0
23406 and wCG1 as call-preserved registers. The 2002/11/21
23407 revision changed this so that all wCG registers are
23408 scratch registers. */
23409 for (regno = FIRST_IWMMXT_GR_REGNUM;
23410 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23411 fixed_regs[regno] = 0;
23412 /* The XScale ABI has wR0 - wR9 as scratch registers,
23413 the rest as call-preserved registers. */
23414 for (regno = FIRST_IWMMXT_REGNUM;
23415 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23417 fixed_regs[regno] = 0;
23418 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23422 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23424 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23425 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23427 else if (TARGET_APCS_STACK)
23429 fixed_regs[10] = 1;
23430 call_used_regs[10] = 1;
23432 /* -mcaller-super-interworking reserves r11 for calls to
23433 _interwork_r11_call_via_rN(). Making the register global
23434 is an easy way of ensuring that it remains valid for all
23436 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23437 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23439 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23440 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23441 if (TARGET_CALLER_INTERWORKING)
23442 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23444 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23447 #include "gt-arm.h"