1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
148 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
149 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
150 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
151 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
155 static bool arm_rtx_costs (rtx, int, int, int *, bool);
156 static int arm_address_cost (rtx, bool);
157 static bool arm_memory_load_p (rtx);
158 static bool arm_cirrus_insn_p (rtx);
159 static void cirrus_reorg (rtx);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx safe_vector_operand (rtx, enum machine_mode);
163 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
164 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
165 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
166 static tree arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond, rtx pattern);
168 static rtx emit_set_insn (rtx, rtx);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
171 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
173 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
175 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
176 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
178 static int aapcs_select_return_coproc (const_tree, const_tree);
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
185 static void arm_encode_section_info (tree, rtx, int);
188 static void arm_file_end (void);
189 static void arm_file_start (void);
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
194 enum machine_mode, const_tree, bool);
195 static bool arm_promote_prototypes (const_tree);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree);
199 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
200 static bool arm_return_in_memory (const_tree, const_tree);
202 static void arm_unwind_emit (FILE *, rtx);
203 static bool arm_output_ttype (rtx);
204 static void arm_asm_emit_except_personality (rtx);
205 static void arm_asm_init_sections (void);
207 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
208 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
209 static rtx arm_dwarf_register_span (rtx);
211 static tree arm_cxx_guard_type (void);
212 static bool arm_cxx_guard_mask_bit (void);
213 static tree arm_get_cookie_size (tree);
214 static bool arm_cookie_has_size (void);
215 static bool arm_cxx_cdtor_returns_this (void);
216 static bool arm_cxx_key_method_may_be_inline (void);
217 static void arm_cxx_determine_class_data_visibility (tree);
218 static bool arm_cxx_class_data_always_comdat (void);
219 static bool arm_cxx_use_aeabi_atexit (void);
220 static void arm_init_libfuncs (void);
221 static tree arm_build_builtin_va_list (void);
222 static void arm_expand_builtin_va_start (tree, rtx);
223 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
224 static void arm_option_override (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
247 static bool arm_class_likely_spilled_p (reg_class_t);
248 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
249 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
253 static void arm_conditional_register_usage (void);
254 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 static unsigned int arm_autovectorize_vector_sizes (void);
258 /* Table of machine attributes. */
259 static const struct attribute_spec arm_attribute_table[] =
261 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
262 affects_type_identity } */
263 /* Function calls made to this symbol must be done indirectly, because
264 it may lie outside of the 26 bit addressing range of a normal function
266 { "long_call", 0, 0, false, true, true, NULL, false },
267 /* Whereas these functions are always known to reside within the 26 bit
269 { "short_call", 0, 0, false, true, true, NULL, false },
270 /* Specify the procedure call conventions for a function. */
271 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
273 /* Interrupt Service Routines have special prologue and epilogue requirements. */
274 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
276 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
278 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
281 /* ARM/PE has three new attributes:
283 dllexport - for exporting a function/variable that will live in a dll
284 dllimport - for importing a function/variable from a dll
286 Microsoft allows multiple declspecs in one __declspec, separating
287 them with spaces. We do NOT support this. Instead, use __declspec
290 { "dllimport", 0, 0, true, false, false, NULL, false },
291 { "dllexport", 0, 0, true, false, false, NULL, false },
292 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
294 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
295 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
296 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
297 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
300 { NULL, 0, 0, false, false, false, NULL, false }
303 /* Set default optimization options. */
304 static const struct default_options arm_option_optimization_table[] =
306 /* Enable section anchors by default at -O1 or higher. */
307 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
308 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
309 { OPT_LEVELS_NONE, 0, NULL, 0 }
312 /* Initialize the GCC target structure. */
313 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
314 #undef TARGET_MERGE_DECL_ATTRIBUTES
315 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
318 #undef TARGET_LEGITIMIZE_ADDRESS
319 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
321 #undef TARGET_ATTRIBUTE_TABLE
322 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
324 #undef TARGET_ASM_FILE_START
325 #define TARGET_ASM_FILE_START arm_file_start
326 #undef TARGET_ASM_FILE_END
327 #define TARGET_ASM_FILE_END arm_file_end
329 #undef TARGET_ASM_ALIGNED_SI_OP
330 #define TARGET_ASM_ALIGNED_SI_OP NULL
331 #undef TARGET_ASM_INTEGER
332 #define TARGET_ASM_INTEGER arm_assemble_integer
334 #undef TARGET_PRINT_OPERAND
335 #define TARGET_PRINT_OPERAND arm_print_operand
336 #undef TARGET_PRINT_OPERAND_ADDRESS
337 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
338 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
339 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
341 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
342 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
344 #undef TARGET_ASM_FUNCTION_PROLOGUE
345 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
347 #undef TARGET_ASM_FUNCTION_EPILOGUE
348 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
350 #undef TARGET_DEFAULT_TARGET_FLAGS
351 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
352 #undef TARGET_OPTION_OVERRIDE
353 #define TARGET_OPTION_OVERRIDE arm_option_override
354 #undef TARGET_OPTION_OPTIMIZATION_TABLE
355 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
357 #undef TARGET_COMP_TYPE_ATTRIBUTES
358 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
360 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
361 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
363 #undef TARGET_SCHED_ADJUST_COST
364 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
366 #undef TARGET_ENCODE_SECTION_INFO
368 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
370 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
373 #undef TARGET_STRIP_NAME_ENCODING
374 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
376 #undef TARGET_ASM_INTERNAL_LABEL
377 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
379 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
380 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
382 #undef TARGET_FUNCTION_VALUE
383 #define TARGET_FUNCTION_VALUE arm_function_value
385 #undef TARGET_LIBCALL_VALUE
386 #define TARGET_LIBCALL_VALUE arm_libcall_value
388 #undef TARGET_ASM_OUTPUT_MI_THUNK
389 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
390 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
391 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
393 #undef TARGET_RTX_COSTS
394 #define TARGET_RTX_COSTS arm_rtx_costs
395 #undef TARGET_ADDRESS_COST
396 #define TARGET_ADDRESS_COST arm_address_cost
398 #undef TARGET_SHIFT_TRUNCATION_MASK
399 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
400 #undef TARGET_VECTOR_MODE_SUPPORTED_P
401 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
402 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
403 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
406 arm_autovectorize_vector_sizes
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
411 #undef TARGET_INIT_BUILTINS
412 #define TARGET_INIT_BUILTINS arm_init_builtins
413 #undef TARGET_EXPAND_BUILTIN
414 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
415 #undef TARGET_BUILTIN_DECL
416 #define TARGET_BUILTIN_DECL arm_builtin_decl
418 #undef TARGET_INIT_LIBFUNCS
419 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
421 #undef TARGET_PROMOTE_FUNCTION_MODE
422 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
423 #undef TARGET_PROMOTE_PROTOTYPES
424 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
425 #undef TARGET_PASS_BY_REFERENCE
426 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
427 #undef TARGET_ARG_PARTIAL_BYTES
428 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
429 #undef TARGET_FUNCTION_ARG
430 #define TARGET_FUNCTION_ARG arm_function_arg
431 #undef TARGET_FUNCTION_ARG_ADVANCE
432 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
433 #undef TARGET_FUNCTION_ARG_BOUNDARY
434 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
436 #undef TARGET_SETUP_INCOMING_VARARGS
437 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
439 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
440 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
442 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
443 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
444 #undef TARGET_TRAMPOLINE_INIT
445 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
446 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
447 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
449 #undef TARGET_DEFAULT_SHORT_ENUMS
450 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
452 #undef TARGET_ALIGN_ANON_BITFIELD
453 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
455 #undef TARGET_NARROW_VOLATILE_BITFIELD
456 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
458 #undef TARGET_CXX_GUARD_TYPE
459 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
461 #undef TARGET_CXX_GUARD_MASK_BIT
462 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
464 #undef TARGET_CXX_GET_COOKIE_SIZE
465 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
467 #undef TARGET_CXX_COOKIE_HAS_SIZE
468 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
470 #undef TARGET_CXX_CDTOR_RETURNS_THIS
471 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
473 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
474 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
476 #undef TARGET_CXX_USE_AEABI_ATEXIT
477 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
479 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
480 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
481 arm_cxx_determine_class_data_visibility
483 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
484 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
486 #undef TARGET_RETURN_IN_MSB
487 #define TARGET_RETURN_IN_MSB arm_return_in_msb
489 #undef TARGET_RETURN_IN_MEMORY
490 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
492 #undef TARGET_MUST_PASS_IN_STACK
493 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
496 #undef TARGET_ASM_UNWIND_EMIT
497 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
499 /* EABI unwinding tables use a different format for the typeinfo tables. */
500 #undef TARGET_ASM_TTYPE
501 #define TARGET_ASM_TTYPE arm_output_ttype
503 #undef TARGET_ARM_EABI_UNWINDER
504 #define TARGET_ARM_EABI_UNWINDER true
506 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
507 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
509 #undef TARGET_ASM_INIT_SECTIONS
510 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
511 #endif /* ARM_UNWIND_INFO */
513 #undef TARGET_EXCEPT_UNWIND_INFO
514 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
516 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
517 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
519 #undef TARGET_DWARF_REGISTER_SPAN
520 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
522 #undef TARGET_CANNOT_COPY_INSN_P
523 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
526 #undef TARGET_HAVE_TLS
527 #define TARGET_HAVE_TLS true
530 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
531 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
533 #undef TARGET_LEGITIMATE_CONSTANT_P
534 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
536 #undef TARGET_CANNOT_FORCE_CONST_MEM
537 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
539 #undef TARGET_MAX_ANCHOR_OFFSET
540 #define TARGET_MAX_ANCHOR_OFFSET 4095
542 /* The minimum is set such that the total size of the block
543 for a particular anchor is -4088 + 1 + 4095 bytes, which is
544 divisible by eight, ensuring natural spacing of anchors. */
545 #undef TARGET_MIN_ANCHOR_OFFSET
546 #define TARGET_MIN_ANCHOR_OFFSET -4088
548 #undef TARGET_SCHED_ISSUE_RATE
549 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
551 #undef TARGET_MANGLE_TYPE
552 #define TARGET_MANGLE_TYPE arm_mangle_type
554 #undef TARGET_BUILD_BUILTIN_VA_LIST
555 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
556 #undef TARGET_EXPAND_BUILTIN_VA_START
557 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
558 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
559 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
562 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
563 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
566 #undef TARGET_LEGITIMATE_ADDRESS_P
567 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
569 #undef TARGET_INVALID_PARAMETER_TYPE
570 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
572 #undef TARGET_INVALID_RETURN_TYPE
573 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
575 #undef TARGET_PROMOTED_TYPE
576 #define TARGET_PROMOTED_TYPE arm_promoted_type
578 #undef TARGET_CONVERT_TO_TYPE
579 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
581 #undef TARGET_SCALAR_MODE_SUPPORTED_P
582 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
584 #undef TARGET_FRAME_POINTER_REQUIRED
585 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
587 #undef TARGET_CAN_ELIMINATE
588 #define TARGET_CAN_ELIMINATE arm_can_eliminate
590 #undef TARGET_CONDITIONAL_REGISTER_USAGE
591 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
593 #undef TARGET_CLASS_LIKELY_SPILLED_P
594 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
596 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
597 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
598 arm_vector_alignment_reachable
600 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
601 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
602 arm_builtin_support_vector_misalignment
604 #undef TARGET_PREFERRED_RENAME_CLASS
605 #define TARGET_PREFERRED_RENAME_CLASS \
606 arm_preferred_rename_class
608 struct gcc_target targetm = TARGET_INITIALIZER;
610 /* Obstack for minipool constant handling. */
611 static struct obstack minipool_obstack;
612 static char * minipool_startobj;
614 /* The maximum number of insns skipped which
615 will be conditionalised if possible. */
616 static int max_insns_skipped = 5;
618 extern FILE * asm_out_file;
620 /* True if we are currently building a constant table. */
621 int making_const_table;
623 /* The processor for which instructions should be scheduled. */
624 enum processor_type arm_tune = arm_none;
626 /* The current tuning set. */
627 const struct tune_params *current_tune;
629 /* Which floating point hardware to schedule for. */
632 /* Which floating popint hardware to use. */
633 const struct arm_fpu_desc *arm_fpu_desc;
635 /* Whether to use floating point hardware. */
636 enum float_abi_type arm_float_abi;
638 /* Which __fp16 format to use. */
639 enum arm_fp16_format_type arm_fp16_format;
641 /* Which ABI to use. */
642 enum arm_abi_type arm_abi;
644 /* Which thread pointer model to use. */
645 enum arm_tp_type target_thread_pointer = TP_AUTO;
647 /* Used to parse -mstructure_size_boundary command line option. */
648 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
650 /* Used for Thumb call_via trampolines. */
651 rtx thumb_call_via_label[14];
652 static int thumb_call_reg_needed;
654 /* Bit values used to identify processor capabilities. */
655 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
656 #define FL_ARCH3M (1 << 1) /* Extended multiply */
657 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
658 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
659 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
660 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
661 #define FL_THUMB (1 << 6) /* Thumb aware */
662 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
663 #define FL_STRONG (1 << 8) /* StrongARM */
664 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
665 #define FL_XSCALE (1 << 10) /* XScale */
666 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
667 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
668 media instructions. */
669 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
670 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
671 Note: ARM6 & 7 derivatives only. */
672 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
673 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
674 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
676 #define FL_DIV (1 << 18) /* Hardware divide. */
677 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
678 #define FL_NEON (1 << 20) /* Neon instructions. */
679 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
681 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
683 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
716 /* The bits in this mask specify which instruction scheduling options should
718 static unsigned long tune_flags = 0;
720 /* The following are used in the arm.md file as equivalents to bits
721 in the above two flag variables. */
723 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
726 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
729 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
732 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
735 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
738 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
741 /* Nonzero if this chip supports the ARM 6K extensions. */
744 /* Nonzero if this chip supports the ARM 7 extensions. */
747 /* Nonzero if instructions not present in the 'M' profile can be used. */
748 int arm_arch_notm = 0;
750 /* Nonzero if instructions present in ARMv7E-M can be used. */
753 /* Nonzero if this chip can benefit from load scheduling. */
754 int arm_ld_sched = 0;
756 /* Nonzero if this chip is a StrongARM. */
757 int arm_tune_strongarm = 0;
759 /* Nonzero if this chip is a Cirrus variant. */
760 int arm_arch_cirrus = 0;
762 /* Nonzero if this chip supports Intel Wireless MMX technology. */
763 int arm_arch_iwmmxt = 0;
765 /* Nonzero if this chip is an XScale. */
766 int arm_arch_xscale = 0;
768 /* Nonzero if tuning for XScale */
769 int arm_tune_xscale = 0;
771 /* Nonzero if we want to tune for stores that access the write-buffer.
772 This typically means an ARM6 or ARM7 with MMU or MPU. */
773 int arm_tune_wbuf = 0;
775 /* Nonzero if tuning for Cortex-A9. */
776 int arm_tune_cortex_a9 = 0;
778 /* Nonzero if generating Thumb instructions. */
781 /* Nonzero if generating Thumb-1 instructions. */
784 /* Nonzero if we should define __THUMB_INTERWORK__ in the
786 XXX This is a bit of a hack, it's intended to help work around
787 problems in GLD which doesn't understand that armv5t code is
788 interworking clean. */
789 int arm_cpp_interwork = 0;
791 /* Nonzero if chip supports Thumb 2. */
794 /* Nonzero if chip supports integer division instruction. */
797 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
798 we must report the mode of the memory reference from
799 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
800 enum machine_mode output_memory_reference_mode;
802 /* The register number to be used for the PIC offset register. */
803 unsigned arm_pic_register = INVALID_REGNUM;
805 /* Set to 1 after arm_reorg has started. Reset to start at the start of
806 the next function. */
807 static int after_arm_reorg = 0;
809 enum arm_pcs arm_pcs_default;
811 /* For an explanation of these variables, see final_prescan_insn below. */
813 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
814 enum arm_cond_code arm_current_cc;
817 int arm_target_label;
818 /* The number of conditionally executed insns, including the current insn. */
819 int arm_condexec_count = 0;
820 /* A bitmask specifying the patterns for the IT block.
821 Zero means do not output an IT block before this insn. */
822 int arm_condexec_mask = 0;
823 /* The number of bits used in arm_condexec_mask. */
824 int arm_condexec_masklen = 0;
826 /* The condition codes of the ARM, and the inverse function. */
827 static const char * const arm_condition_codes[] =
829 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
830 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
833 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
834 int arm_regs_in_sequence[] =
836 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
839 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
840 #define streq(string1, string2) (strcmp (string1, string2) == 0)
842 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
843 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
844 | (1 << PIC_OFFSET_TABLE_REGNUM)))
846 /* Initialization code. */
850 const char *const name;
851 enum processor_type core;
853 const unsigned long flags;
854 const struct tune_params *const tune;
858 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
859 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
864 const struct tune_params arm_slowmul_tune =
866 arm_slowmul_rtx_costs,
869 ARM_PREFETCH_NOT_BENEFICIAL
872 const struct tune_params arm_fastmul_tune =
874 arm_fastmul_rtx_costs,
877 ARM_PREFETCH_NOT_BENEFICIAL
880 const struct tune_params arm_xscale_tune =
882 arm_xscale_rtx_costs,
883 xscale_sched_adjust_cost,
885 ARM_PREFETCH_NOT_BENEFICIAL
888 const struct tune_params arm_9e_tune =
893 ARM_PREFETCH_NOT_BENEFICIAL
896 const struct tune_params arm_cortex_a9_tune =
899 cortex_a9_sched_adjust_cost,
901 ARM_PREFETCH_BENEFICIAL(4,32,32)
904 const struct tune_params arm_fa726te_tune =
907 fa726te_sched_adjust_cost,
909 ARM_PREFETCH_NOT_BENEFICIAL
913 /* Not all of these give usefully different compilation alternatives,
914 but there is no simple way of generalizing them. */
915 static const struct processors all_cores[] =
918 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
919 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
920 #include "arm-cores.def"
922 {NULL, arm_none, NULL, 0, NULL}
925 static const struct processors all_architectures[] =
927 /* ARM Architectures */
928 /* We don't specify tuning costs here as it will be figured out
931 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
932 {NAME, CORE, #ARCH, FLAGS, NULL},
933 #include "arm-arches.def"
935 {NULL, arm_none, NULL, 0 , NULL}
939 /* These are populated as commandline arguments are processed, or NULL
941 static const struct processors *arm_selected_arch;
942 static const struct processors *arm_selected_cpu;
943 static const struct processors *arm_selected_tune;
945 /* The name of the preprocessor macro to define for this architecture. */
947 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
949 /* Available values for -mfpu=. */
951 static const struct arm_fpu_desc all_fpus[] =
953 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
954 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
955 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
956 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
957 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
958 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
959 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
960 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
961 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
962 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
963 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
964 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
965 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
966 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
967 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
968 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
969 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
970 /* Compatibility aliases. */
971 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
978 enum float_abi_type abi_type;
982 /* Available values for -mfloat-abi=. */
984 static const struct float_abi all_float_abis[] =
986 {"soft", ARM_FLOAT_ABI_SOFT},
987 {"softfp", ARM_FLOAT_ABI_SOFTFP},
988 {"hard", ARM_FLOAT_ABI_HARD}
995 enum arm_fp16_format_type fp16_format_type;
999 /* Available values for -mfp16-format=. */
1001 static const struct fp16_format all_fp16_formats[] =
1003 {"none", ARM_FP16_FORMAT_NONE},
1004 {"ieee", ARM_FP16_FORMAT_IEEE},
1005 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1012 enum arm_abi_type abi_type;
1016 /* Available values for -mabi=. */
1018 static const struct abi_name arm_all_abis[] =
1020 {"apcs-gnu", ARM_ABI_APCS},
1021 {"atpcs", ARM_ABI_ATPCS},
1022 {"aapcs", ARM_ABI_AAPCS},
1023 {"iwmmxt", ARM_ABI_IWMMXT},
1024 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1027 /* Supported TLS relocations. */
1037 /* The maximum number of insns to be used when loading a constant. */
1039 arm_constant_limit (bool size_p)
1041 return size_p ? 1 : current_tune->constant_limit;
1044 /* Emit an insn that's a simple single-set. Both the operands must be known
1047 emit_set_insn (rtx x, rtx y)
1049 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1052 /* Return the number of bits set in VALUE. */
1054 bit_count (unsigned long value)
1056 unsigned long count = 0;
1061 value &= value - 1; /* Clear the least-significant set bit. */
1067 /* Set up library functions unique to ARM. */
1070 arm_init_libfuncs (void)
1072 /* There are no special library functions unless we are using the
1077 /* The functions below are described in Section 4 of the "Run-Time
1078 ABI for the ARM architecture", Version 1.0. */
1080 /* Double-precision floating-point arithmetic. Table 2. */
1081 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1082 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1083 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1084 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1085 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1087 /* Double-precision comparisons. Table 3. */
1088 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1089 set_optab_libfunc (ne_optab, DFmode, NULL);
1090 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1091 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1092 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1093 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1094 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1096 /* Single-precision floating-point arithmetic. Table 4. */
1097 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1098 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1099 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1100 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1101 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1103 /* Single-precision comparisons. Table 5. */
1104 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1105 set_optab_libfunc (ne_optab, SFmode, NULL);
1106 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1107 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1108 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1109 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1110 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1112 /* Floating-point to integer conversions. Table 6. */
1113 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1114 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1115 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1116 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1117 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1118 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1119 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1120 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1122 /* Conversions between floating types. Table 7. */
1123 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1124 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1126 /* Integer to floating-point conversions. Table 8. */
1127 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1128 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1129 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1130 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1131 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1132 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1133 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1134 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1136 /* Long long. Table 9. */
1137 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1138 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1139 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1140 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1141 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1142 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1143 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1144 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1146 /* Integer (32/32->32) division. \S 4.3.1. */
1147 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1148 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1150 /* The divmod functions are designed so that they can be used for
1151 plain division, even though they return both the quotient and the
1152 remainder. The quotient is returned in the usual location (i.e.,
1153 r0 for SImode, {r0, r1} for DImode), just as would be expected
1154 for an ordinary division routine. Because the AAPCS calling
1155 conventions specify that all of { r0, r1, r2, r3 } are
1156 callee-saved registers, there is no need to tell the compiler
1157 explicitly that those registers are clobbered by these
1159 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1160 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1162 /* For SImode division the ABI provides div-without-mod routines,
1163 which are faster. */
1164 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1165 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1167 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1168 divmod libcalls instead. */
1169 set_optab_libfunc (smod_optab, DImode, NULL);
1170 set_optab_libfunc (umod_optab, DImode, NULL);
1171 set_optab_libfunc (smod_optab, SImode, NULL);
1172 set_optab_libfunc (umod_optab, SImode, NULL);
1174 /* Half-precision float operations. The compiler handles all operations
1175 with NULL libfuncs by converting the SFmode. */
1176 switch (arm_fp16_format)
1178 case ARM_FP16_FORMAT_IEEE:
1179 case ARM_FP16_FORMAT_ALTERNATIVE:
1182 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1183 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1185 : "__gnu_f2h_alternative"));
1186 set_conv_libfunc (sext_optab, SFmode, HFmode,
1187 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1189 : "__gnu_h2f_alternative"));
1192 set_optab_libfunc (add_optab, HFmode, NULL);
1193 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1194 set_optab_libfunc (smul_optab, HFmode, NULL);
1195 set_optab_libfunc (neg_optab, HFmode, NULL);
1196 set_optab_libfunc (sub_optab, HFmode, NULL);
1199 set_optab_libfunc (eq_optab, HFmode, NULL);
1200 set_optab_libfunc (ne_optab, HFmode, NULL);
1201 set_optab_libfunc (lt_optab, HFmode, NULL);
1202 set_optab_libfunc (le_optab, HFmode, NULL);
1203 set_optab_libfunc (ge_optab, HFmode, NULL);
1204 set_optab_libfunc (gt_optab, HFmode, NULL);
1205 set_optab_libfunc (unord_optab, HFmode, NULL);
1212 if (TARGET_AAPCS_BASED)
1213 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1216 /* On AAPCS systems, this is the "struct __va_list". */
1217 static GTY(()) tree va_list_type;
1219 /* Return the type to use as __builtin_va_list. */
1221 arm_build_builtin_va_list (void)
1226 if (!TARGET_AAPCS_BASED)
1227 return std_build_builtin_va_list ();
1229 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1237 The C Library ABI further reinforces this definition in \S
1240 We must follow this definition exactly. The structure tag
1241 name is visible in C++ mangled names, and thus forms a part
1242 of the ABI. The field name may be used by people who
1243 #include <stdarg.h>. */
1244 /* Create the type. */
1245 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1246 /* Give it the required name. */
1247 va_list_name = build_decl (BUILTINS_LOCATION,
1249 get_identifier ("__va_list"),
1251 DECL_ARTIFICIAL (va_list_name) = 1;
1252 TYPE_NAME (va_list_type) = va_list_name;
1253 TYPE_STUB_DECL (va_list_type) = va_list_name;
1254 /* Create the __ap field. */
1255 ap_field = build_decl (BUILTINS_LOCATION,
1257 get_identifier ("__ap"),
1259 DECL_ARTIFICIAL (ap_field) = 1;
1260 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1261 TYPE_FIELDS (va_list_type) = ap_field;
1262 /* Compute its layout. */
1263 layout_type (va_list_type);
1265 return va_list_type;
1268 /* Return an expression of type "void *" pointing to the next
1269 available argument in a variable-argument list. VALIST is the
1270 user-level va_list object, of type __builtin_va_list. */
1272 arm_extract_valist_ptr (tree valist)
1274 if (TREE_TYPE (valist) == error_mark_node)
1275 return error_mark_node;
1277 /* On an AAPCS target, the pointer is stored within "struct
1279 if (TARGET_AAPCS_BASED)
1281 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1282 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1283 valist, ap_field, NULL_TREE);
1289 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1291 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1293 valist = arm_extract_valist_ptr (valist);
1294 std_expand_builtin_va_start (valist, nextarg);
1297 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1299 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1302 valist = arm_extract_valist_ptr (valist);
1303 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1306 /* Fix up any incompatible options that the user has specified. */
1308 arm_option_override (void)
1312 if (global_options_set.x_arm_arch_option)
1313 arm_selected_arch = &all_architectures[arm_arch_option];
1315 if (global_options_set.x_arm_cpu_option)
1316 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1318 if (global_options_set.x_arm_tune_option)
1319 arm_selected_tune = &all_cores[(int) arm_tune_option];
1321 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1322 SUBTARGET_OVERRIDE_OPTIONS;
1325 if (arm_selected_arch)
1327 if (arm_selected_cpu)
1329 /* Check for conflict between mcpu and march. */
1330 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1332 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1333 arm_selected_cpu->name, arm_selected_arch->name);
1334 /* -march wins for code generation.
1335 -mcpu wins for default tuning. */
1336 if (!arm_selected_tune)
1337 arm_selected_tune = arm_selected_cpu;
1339 arm_selected_cpu = arm_selected_arch;
1343 arm_selected_arch = NULL;
1346 /* Pick a CPU based on the architecture. */
1347 arm_selected_cpu = arm_selected_arch;
1350 /* If the user did not specify a processor, choose one for them. */
1351 if (!arm_selected_cpu)
1353 const struct processors * sel;
1354 unsigned int sought;
1356 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1357 if (!arm_selected_cpu->name)
1359 #ifdef SUBTARGET_CPU_DEFAULT
1360 /* Use the subtarget default CPU if none was specified by
1362 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1364 /* Default to ARM6. */
1365 if (!arm_selected_cpu->name)
1366 arm_selected_cpu = &all_cores[arm6];
1369 sel = arm_selected_cpu;
1370 insn_flags = sel->flags;
1372 /* Now check to see if the user has specified some command line
1373 switch that require certain abilities from the cpu. */
1376 if (TARGET_INTERWORK || TARGET_THUMB)
1378 sought |= (FL_THUMB | FL_MODE32);
1380 /* There are no ARM processors that support both APCS-26 and
1381 interworking. Therefore we force FL_MODE26 to be removed
1382 from insn_flags here (if it was set), so that the search
1383 below will always be able to find a compatible processor. */
1384 insn_flags &= ~FL_MODE26;
1387 if (sought != 0 && ((sought & insn_flags) != sought))
1389 /* Try to locate a CPU type that supports all of the abilities
1390 of the default CPU, plus the extra abilities requested by
1392 for (sel = all_cores; sel->name != NULL; sel++)
1393 if ((sel->flags & sought) == (sought | insn_flags))
1396 if (sel->name == NULL)
1398 unsigned current_bit_count = 0;
1399 const struct processors * best_fit = NULL;
1401 /* Ideally we would like to issue an error message here
1402 saying that it was not possible to find a CPU compatible
1403 with the default CPU, but which also supports the command
1404 line options specified by the programmer, and so they
1405 ought to use the -mcpu=<name> command line option to
1406 override the default CPU type.
1408 If we cannot find a cpu that has both the
1409 characteristics of the default cpu and the given
1410 command line options we scan the array again looking
1411 for a best match. */
1412 for (sel = all_cores; sel->name != NULL; sel++)
1413 if ((sel->flags & sought) == sought)
1417 count = bit_count (sel->flags & insn_flags);
1419 if (count >= current_bit_count)
1422 current_bit_count = count;
1426 gcc_assert (best_fit);
1430 arm_selected_cpu = sel;
1434 gcc_assert (arm_selected_cpu);
1435 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1436 if (!arm_selected_tune)
1437 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1439 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1440 insn_flags = arm_selected_cpu->flags;
1442 arm_tune = arm_selected_tune->core;
1443 tune_flags = arm_selected_tune->flags;
1444 current_tune = arm_selected_tune->tune;
1446 if (target_fp16_format_name)
1448 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1450 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1452 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1456 if (i == ARRAY_SIZE (all_fp16_formats))
1457 error ("invalid __fp16 format option: -mfp16-format=%s",
1458 target_fp16_format_name);
1461 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1463 if (target_abi_name)
1465 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1467 if (streq (arm_all_abis[i].name, target_abi_name))
1469 arm_abi = arm_all_abis[i].abi_type;
1473 if (i == ARRAY_SIZE (arm_all_abis))
1474 error ("invalid ABI option: -mabi=%s", target_abi_name);
1477 arm_abi = ARM_DEFAULT_ABI;
1479 /* Make sure that the processor choice does not conflict with any of the
1480 other command line choices. */
1481 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1482 error ("target CPU does not support ARM mode");
1484 /* BPABI targets use linker tricks to allow interworking on cores
1485 without thumb support. */
1486 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1488 warning (0, "target CPU does not support interworking" );
1489 target_flags &= ~MASK_INTERWORK;
1492 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1494 warning (0, "target CPU does not support THUMB instructions");
1495 target_flags &= ~MASK_THUMB;
1498 if (TARGET_APCS_FRAME && TARGET_THUMB)
1500 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1501 target_flags &= ~MASK_APCS_FRAME;
1504 /* Callee super interworking implies thumb interworking. Adding
1505 this to the flags here simplifies the logic elsewhere. */
1506 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1507 target_flags |= MASK_INTERWORK;
1509 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1510 from here where no function is being compiled currently. */
1511 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1512 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1514 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1515 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1517 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1519 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1520 target_flags |= MASK_APCS_FRAME;
1523 if (TARGET_POKE_FUNCTION_NAME)
1524 target_flags |= MASK_APCS_FRAME;
1526 if (TARGET_APCS_REENT && flag_pic)
1527 error ("-fpic and -mapcs-reent are incompatible");
1529 if (TARGET_APCS_REENT)
1530 warning (0, "APCS reentrant code not supported. Ignored");
1532 /* If this target is normally configured to use APCS frames, warn if they
1533 are turned off and debugging is turned on. */
1535 && write_symbols != NO_DEBUG
1536 && !TARGET_APCS_FRAME
1537 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1538 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1540 if (TARGET_APCS_FLOAT)
1541 warning (0, "passing floating point arguments in fp regs not yet supported");
1543 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1544 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1545 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1546 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1547 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1548 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1549 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1550 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1551 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1552 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1553 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1554 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1555 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1556 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1558 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1559 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1560 thumb_code = TARGET_ARM == 0;
1561 thumb1_code = TARGET_THUMB1 != 0;
1562 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1563 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1564 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1565 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1566 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1568 /* If we are not using the default (ARM mode) section anchor offset
1569 ranges, then set the correct ranges now. */
1572 /* Thumb-1 LDR instructions cannot have negative offsets.
1573 Permissible positive offset ranges are 5-bit (for byte loads),
1574 6-bit (for halfword loads), or 7-bit (for word loads).
1575 Empirical results suggest a 7-bit anchor range gives the best
1576 overall code size. */
1577 targetm.min_anchor_offset = 0;
1578 targetm.max_anchor_offset = 127;
1580 else if (TARGET_THUMB2)
1582 /* The minimum is set such that the total size of the block
1583 for a particular anchor is 248 + 1 + 4095 bytes, which is
1584 divisible by eight, ensuring natural spacing of anchors. */
1585 targetm.min_anchor_offset = -248;
1586 targetm.max_anchor_offset = 4095;
1589 /* V5 code we generate is completely interworking capable, so we turn off
1590 TARGET_INTERWORK here to avoid many tests later on. */
1592 /* XXX However, we must pass the right pre-processor defines to CPP
1593 or GLD can get confused. This is a hack. */
1594 if (TARGET_INTERWORK)
1595 arm_cpp_interwork = 1;
1598 target_flags &= ~MASK_INTERWORK;
1600 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1601 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1603 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1604 error ("iwmmxt abi requires an iwmmxt capable cpu");
1606 if (target_fpu_name == NULL && target_fpe_name != NULL)
1608 if (streq (target_fpe_name, "2"))
1609 target_fpu_name = "fpe2";
1610 else if (streq (target_fpe_name, "3"))
1611 target_fpu_name = "fpe3";
1613 error ("invalid floating point emulation option: -mfpe=%s",
1617 if (target_fpu_name == NULL)
1619 #ifdef FPUTYPE_DEFAULT
1620 target_fpu_name = FPUTYPE_DEFAULT;
1622 if (arm_arch_cirrus)
1623 target_fpu_name = "maverick";
1625 target_fpu_name = "fpe2";
1629 arm_fpu_desc = NULL;
1630 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1632 if (streq (all_fpus[i].name, target_fpu_name))
1634 arm_fpu_desc = &all_fpus[i];
1641 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1645 switch (arm_fpu_desc->model)
1647 case ARM_FP_MODEL_FPA:
1648 if (arm_fpu_desc->rev == 2)
1649 arm_fpu_attr = FPU_FPE2;
1650 else if (arm_fpu_desc->rev == 3)
1651 arm_fpu_attr = FPU_FPE3;
1653 arm_fpu_attr = FPU_FPA;
1656 case ARM_FP_MODEL_MAVERICK:
1657 arm_fpu_attr = FPU_MAVERICK;
1660 case ARM_FP_MODEL_VFP:
1661 arm_fpu_attr = FPU_VFP;
1668 if (target_float_abi_name != NULL)
1670 /* The user specified a FP ABI. */
1671 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1673 if (streq (all_float_abis[i].name, target_float_abi_name))
1675 arm_float_abi = all_float_abis[i].abi_type;
1679 if (i == ARRAY_SIZE (all_float_abis))
1680 error ("invalid floating point abi: -mfloat-abi=%s",
1681 target_float_abi_name);
1684 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1686 if (TARGET_AAPCS_BASED
1687 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1688 error ("FPA is unsupported in the AAPCS");
1690 if (TARGET_AAPCS_BASED)
1692 if (TARGET_CALLER_INTERWORKING)
1693 error ("AAPCS does not support -mcaller-super-interworking");
1695 if (TARGET_CALLEE_INTERWORKING)
1696 error ("AAPCS does not support -mcallee-super-interworking");
1699 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1700 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1701 will ever exist. GCC makes no attempt to support this combination. */
1702 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1703 sorry ("iWMMXt and hardware floating point");
1705 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1706 if (TARGET_THUMB2 && TARGET_IWMMXT)
1707 sorry ("Thumb-2 iWMMXt");
1709 /* __fp16 support currently assumes the core has ldrh. */
1710 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1711 sorry ("__fp16 and no ldrh");
1713 /* If soft-float is specified then don't use FPU. */
1714 if (TARGET_SOFT_FLOAT)
1715 arm_fpu_attr = FPU_NONE;
1717 if (TARGET_AAPCS_BASED)
1719 if (arm_abi == ARM_ABI_IWMMXT)
1720 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1721 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1722 && TARGET_HARD_FLOAT
1724 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1726 arm_pcs_default = ARM_PCS_AAPCS;
1730 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1731 sorry ("-mfloat-abi=hard and VFP");
1733 if (arm_abi == ARM_ABI_APCS)
1734 arm_pcs_default = ARM_PCS_APCS;
1736 arm_pcs_default = ARM_PCS_ATPCS;
1739 /* For arm2/3 there is no need to do any scheduling if there is only
1740 a floating point emulator, or we are doing software floating-point. */
1741 if ((TARGET_SOFT_FLOAT
1742 || (TARGET_FPA && arm_fpu_desc->rev))
1743 && (tune_flags & FL_MODE32) == 0)
1744 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1746 if (target_thread_switch)
1748 if (strcmp (target_thread_switch, "soft") == 0)
1749 target_thread_pointer = TP_SOFT;
1750 else if (strcmp (target_thread_switch, "auto") == 0)
1751 target_thread_pointer = TP_AUTO;
1752 else if (strcmp (target_thread_switch, "cp15") == 0)
1753 target_thread_pointer = TP_CP15;
1755 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1758 /* Use the cp15 method if it is available. */
1759 if (target_thread_pointer == TP_AUTO)
1761 if (arm_arch6k && !TARGET_THUMB1)
1762 target_thread_pointer = TP_CP15;
1764 target_thread_pointer = TP_SOFT;
1767 if (TARGET_HARD_TP && TARGET_THUMB1)
1768 error ("can not use -mtp=cp15 with 16-bit Thumb");
1770 /* Override the default structure alignment for AAPCS ABI. */
1771 if (TARGET_AAPCS_BASED)
1772 arm_structure_size_boundary = 8;
1774 if (structure_size_string != NULL)
1776 int size = strtol (structure_size_string, NULL, 0);
1778 if (size == 8 || size == 32
1779 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1780 arm_structure_size_boundary = size;
1782 warning (0, "structure size boundary can only be set to %s",
1783 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1786 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1788 error ("RTP PIC is incompatible with Thumb");
1792 /* If stack checking is disabled, we can use r10 as the PIC register,
1793 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1794 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1796 if (TARGET_VXWORKS_RTP)
1797 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1798 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1801 if (flag_pic && TARGET_VXWORKS_RTP)
1802 arm_pic_register = 9;
1804 if (arm_pic_register_string != NULL)
1806 int pic_register = decode_reg_name (arm_pic_register_string);
1809 warning (0, "-mpic-register= is useless without -fpic");
1811 /* Prevent the user from choosing an obviously stupid PIC register. */
1812 else if (pic_register < 0 || call_used_regs[pic_register]
1813 || pic_register == HARD_FRAME_POINTER_REGNUM
1814 || pic_register == STACK_POINTER_REGNUM
1815 || pic_register >= PC_REGNUM
1816 || (TARGET_VXWORKS_RTP
1817 && (unsigned int) pic_register != arm_pic_register))
1818 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1820 arm_pic_register = pic_register;
1823 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1824 if (fix_cm3_ldrd == 2)
1826 if (arm_selected_cpu->core == cortexm3)
1832 if (TARGET_THUMB1 && flag_schedule_insns)
1834 /* Don't warn since it's on by default in -O2. */
1835 flag_schedule_insns = 0;
1840 /* If optimizing for size, bump the number of instructions that we
1841 are prepared to conditionally execute (even on a StrongARM). */
1842 max_insns_skipped = 6;
1846 /* StrongARM has early execution of branches, so a sequence
1847 that is worth skipping is shorter. */
1848 if (arm_tune_strongarm)
1849 max_insns_skipped = 3;
1852 /* Hot/Cold partitioning is not currently supported, since we can't
1853 handle literal pool placement in that case. */
1854 if (flag_reorder_blocks_and_partition)
1856 inform (input_location,
1857 "-freorder-blocks-and-partition not supported on this architecture");
1858 flag_reorder_blocks_and_partition = 0;
1859 flag_reorder_blocks = 1;
1863 /* Hoisting PIC address calculations more aggressively provides a small,
1864 but measurable, size reduction for PIC code. Therefore, we decrease
1865 the bar for unrestricted expression hoisting to the cost of PIC address
1866 calculation, which is 2 instructions. */
1867 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1868 global_options.x_param_values,
1869 global_options_set.x_param_values);
1871 /* ARM EABI defaults to strict volatile bitfields. */
1872 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1873 flag_strict_volatile_bitfields = 1;
1875 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1876 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1877 if (flag_prefetch_loop_arrays < 0
1880 && current_tune->num_prefetch_slots > 0)
1881 flag_prefetch_loop_arrays = 1;
1883 /* Set up parameters to be used in prefetching algorithm. Do not override the
1884 defaults unless we are tuning for a core we have researched values for. */
1885 if (current_tune->num_prefetch_slots > 0)
1886 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1887 current_tune->num_prefetch_slots,
1888 global_options.x_param_values,
1889 global_options_set.x_param_values);
1890 if (current_tune->l1_cache_line_size >= 0)
1891 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1892 current_tune->l1_cache_line_size,
1893 global_options.x_param_values,
1894 global_options_set.x_param_values);
1895 if (current_tune->l1_cache_size >= 0)
1896 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1897 current_tune->l1_cache_size,
1898 global_options.x_param_values,
1899 global_options_set.x_param_values);
1901 /* Register global variables with the garbage collector. */
1902 arm_add_gc_roots ();
1906 arm_add_gc_roots (void)
1908 gcc_obstack_init(&minipool_obstack);
1909 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1912 /* A table of known ARM exception types.
1913 For use with the interrupt function attribute. */
1917 const char *const arg;
1918 const unsigned long return_value;
1922 static const isr_attribute_arg isr_attribute_args [] =
1924 { "IRQ", ARM_FT_ISR },
1925 { "irq", ARM_FT_ISR },
1926 { "FIQ", ARM_FT_FIQ },
1927 { "fiq", ARM_FT_FIQ },
1928 { "ABORT", ARM_FT_ISR },
1929 { "abort", ARM_FT_ISR },
1930 { "ABORT", ARM_FT_ISR },
1931 { "abort", ARM_FT_ISR },
1932 { "UNDEF", ARM_FT_EXCEPTION },
1933 { "undef", ARM_FT_EXCEPTION },
1934 { "SWI", ARM_FT_EXCEPTION },
1935 { "swi", ARM_FT_EXCEPTION },
1936 { NULL, ARM_FT_NORMAL }
1939 /* Returns the (interrupt) function type of the current
1940 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1942 static unsigned long
1943 arm_isr_value (tree argument)
1945 const isr_attribute_arg * ptr;
1949 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1951 /* No argument - default to IRQ. */
1952 if (argument == NULL_TREE)
1955 /* Get the value of the argument. */
1956 if (TREE_VALUE (argument) == NULL_TREE
1957 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1958 return ARM_FT_UNKNOWN;
1960 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1962 /* Check it against the list of known arguments. */
1963 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1964 if (streq (arg, ptr->arg))
1965 return ptr->return_value;
1967 /* An unrecognized interrupt type. */
1968 return ARM_FT_UNKNOWN;
1971 /* Computes the type of the current function. */
1973 static unsigned long
1974 arm_compute_func_type (void)
1976 unsigned long type = ARM_FT_UNKNOWN;
1980 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1982 /* Decide if the current function is volatile. Such functions
1983 never return, and many memory cycles can be saved by not storing
1984 register values that will never be needed again. This optimization
1985 was added to speed up context switching in a kernel application. */
1987 && (TREE_NOTHROW (current_function_decl)
1988 || !(flag_unwind_tables
1990 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1991 && TREE_THIS_VOLATILE (current_function_decl))
1992 type |= ARM_FT_VOLATILE;
1994 if (cfun->static_chain_decl != NULL)
1995 type |= ARM_FT_NESTED;
1997 attr = DECL_ATTRIBUTES (current_function_decl);
1999 a = lookup_attribute ("naked", attr);
2001 type |= ARM_FT_NAKED;
2003 a = lookup_attribute ("isr", attr);
2005 a = lookup_attribute ("interrupt", attr);
2008 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2010 type |= arm_isr_value (TREE_VALUE (a));
2015 /* Returns the type of the current function. */
2018 arm_current_func_type (void)
2020 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2021 cfun->machine->func_type = arm_compute_func_type ();
2023 return cfun->machine->func_type;
2027 arm_allocate_stack_slots_for_args (void)
2029 /* Naked functions should not allocate stack slots for arguments. */
2030 return !IS_NAKED (arm_current_func_type ());
2034 /* Output assembler code for a block containing the constant parts
2035 of a trampoline, leaving space for the variable parts.
2037 On the ARM, (if r8 is the static chain regnum, and remembering that
2038 referencing pc adds an offset of 8) the trampoline looks like:
2041 .word static chain value
2042 .word function's address
2043 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2046 arm_asm_trampoline_template (FILE *f)
2050 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2051 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2053 else if (TARGET_THUMB2)
2055 /* The Thumb-2 trampoline is similar to the arm implementation.
2056 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2057 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2058 STATIC_CHAIN_REGNUM, PC_REGNUM);
2059 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2063 ASM_OUTPUT_ALIGN (f, 2);
2064 fprintf (f, "\t.code\t16\n");
2065 fprintf (f, ".Ltrampoline_start:\n");
2066 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2067 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2068 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2069 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2070 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2071 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2073 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2074 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2077 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2080 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2082 rtx fnaddr, mem, a_tramp;
2084 emit_block_move (m_tramp, assemble_trampoline_template (),
2085 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2087 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2088 emit_move_insn (mem, chain_value);
2090 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2091 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2092 emit_move_insn (mem, fnaddr);
2094 a_tramp = XEXP (m_tramp, 0);
2095 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2096 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2097 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2100 /* Thumb trampolines should be entered in thumb mode, so set
2101 the bottom bit of the address. */
2104 arm_trampoline_adjust_address (rtx addr)
2107 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2108 NULL, 0, OPTAB_LIB_WIDEN);
2112 /* Return 1 if it is possible to return using a single instruction.
2113 If SIBLING is non-null, this is a test for a return before a sibling
2114 call. SIBLING is the call insn, so we can examine its register usage. */
2117 use_return_insn (int iscond, rtx sibling)
2120 unsigned int func_type;
2121 unsigned long saved_int_regs;
2122 unsigned HOST_WIDE_INT stack_adjust;
2123 arm_stack_offsets *offsets;
2125 /* Never use a return instruction before reload has run. */
2126 if (!reload_completed)
2129 func_type = arm_current_func_type ();
2131 /* Naked, volatile and stack alignment functions need special
2133 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2136 /* So do interrupt functions that use the frame pointer and Thumb
2137 interrupt functions. */
2138 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2141 offsets = arm_get_frame_offsets ();
2142 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2144 /* As do variadic functions. */
2145 if (crtl->args.pretend_args_size
2146 || cfun->machine->uses_anonymous_args
2147 /* Or if the function calls __builtin_eh_return () */
2148 || crtl->calls_eh_return
2149 /* Or if the function calls alloca */
2150 || cfun->calls_alloca
2151 /* Or if there is a stack adjustment. However, if the stack pointer
2152 is saved on the stack, we can use a pre-incrementing stack load. */
2153 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2154 && stack_adjust == 4)))
2157 saved_int_regs = offsets->saved_regs_mask;
2159 /* Unfortunately, the insn
2161 ldmib sp, {..., sp, ...}
2163 triggers a bug on most SA-110 based devices, such that the stack
2164 pointer won't be correctly restored if the instruction takes a
2165 page fault. We work around this problem by popping r3 along with
2166 the other registers, since that is never slower than executing
2167 another instruction.
2169 We test for !arm_arch5 here, because code for any architecture
2170 less than this could potentially be run on one of the buggy
2172 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2174 /* Validate that r3 is a call-clobbered register (always true in
2175 the default abi) ... */
2176 if (!call_used_regs[3])
2179 /* ... that it isn't being used for a return value ... */
2180 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2183 /* ... or for a tail-call argument ... */
2186 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2188 if (find_regno_fusage (sibling, USE, 3))
2192 /* ... and that there are no call-saved registers in r0-r2
2193 (always true in the default ABI). */
2194 if (saved_int_regs & 0x7)
2198 /* Can't be done if interworking with Thumb, and any registers have been
2200 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2203 /* On StrongARM, conditional returns are expensive if they aren't
2204 taken and multiple registers have been stacked. */
2205 if (iscond && arm_tune_strongarm)
2207 /* Conditional return when just the LR is stored is a simple
2208 conditional-load instruction, that's not expensive. */
2209 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2213 && arm_pic_register != INVALID_REGNUM
2214 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2218 /* If there are saved registers but the LR isn't saved, then we need
2219 two instructions for the return. */
2220 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2223 /* Can't be done if any of the FPA regs are pushed,
2224 since this also requires an insn. */
2225 if (TARGET_HARD_FLOAT && TARGET_FPA)
2226 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2227 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2230 /* Likewise VFP regs. */
2231 if (TARGET_HARD_FLOAT && TARGET_VFP)
2232 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2233 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2236 if (TARGET_REALLY_IWMMXT)
2237 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2238 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2244 /* Return TRUE if int I is a valid immediate ARM constant. */
2247 const_ok_for_arm (HOST_WIDE_INT i)
2251 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2252 be all zero, or all one. */
2253 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2254 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2255 != ((~(unsigned HOST_WIDE_INT) 0)
2256 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2259 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2261 /* Fast return for 0 and small values. We must do this for zero, since
2262 the code below can't handle that one case. */
2263 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2266 /* Get the number of trailing zeros. */
2267 lowbit = ffs((int) i) - 1;
2269 /* Only even shifts are allowed in ARM mode so round down to the
2270 nearest even number. */
2274 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2279 /* Allow rotated constants in ARM mode. */
2281 && ((i & ~0xc000003f) == 0
2282 || (i & ~0xf000000f) == 0
2283 || (i & ~0xfc000003) == 0))
2290 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2293 if (i == v || i == (v | (v << 8)))
2296 /* Allow repeated pattern 0xXY00XY00. */
2306 /* Return true if I is a valid constant for the operation CODE. */
2308 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2310 if (const_ok_for_arm (i))
2316 /* See if we can use movw. */
2317 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2341 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2343 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2349 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2353 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2360 /* Emit a sequence of insns to handle a large constant.
2361 CODE is the code of the operation required, it can be any of SET, PLUS,
2362 IOR, AND, XOR, MINUS;
2363 MODE is the mode in which the operation is being performed;
2364 VAL is the integer to operate on;
2365 SOURCE is the other operand (a register, or a null-pointer for SET);
2366 SUBTARGETS means it is safe to create scratch registers if that will
2367 either produce a simpler sequence, or we will want to cse the values.
2368 Return value is the number of insns emitted. */
2370 /* ??? Tweak this for thumb2. */
2372 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2373 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2377 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2378 cond = COND_EXEC_TEST (PATTERN (insn));
2382 if (subtargets || code == SET
2383 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2384 && REGNO (target) != REGNO (source)))
2386 /* After arm_reorg has been called, we can't fix up expensive
2387 constants by pushing them into memory so we must synthesize
2388 them in-line, regardless of the cost. This is only likely to
2389 be more costly on chips that have load delay slots and we are
2390 compiling without running the scheduler (so no splitting
2391 occurred before the final instruction emission).
2393 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2395 if (!after_arm_reorg
2397 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2399 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2404 /* Currently SET is the only monadic value for CODE, all
2405 the rest are diadic. */
2406 if (TARGET_USE_MOVT)
2407 arm_emit_movpair (target, GEN_INT (val));
2409 emit_set_insn (target, GEN_INT (val));
2415 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2417 if (TARGET_USE_MOVT)
2418 arm_emit_movpair (temp, GEN_INT (val));
2420 emit_set_insn (temp, GEN_INT (val));
2422 /* For MINUS, the value is subtracted from, since we never
2423 have subtraction of a constant. */
2425 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2427 emit_set_insn (target,
2428 gen_rtx_fmt_ee (code, mode, source, temp));
2434 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2438 /* Return the number of instructions required to synthesize the given
2439 constant, if we start emitting them from bit-position I. */
2441 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2443 HOST_WIDE_INT temp1;
2444 int step_size = TARGET_ARM ? 2 : 1;
2447 gcc_assert (TARGET_ARM || i == 0);
2455 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2460 temp1 = remainder & ((0x0ff << end)
2461 | ((i < end) ? (0xff >> (32 - end)) : 0));
2462 remainder &= ~temp1;
2467 } while (remainder);
2472 find_best_start (unsigned HOST_WIDE_INT remainder)
2474 int best_consecutive_zeros = 0;
2478 /* If we aren't targetting ARM, the best place to start is always at
2483 for (i = 0; i < 32; i += 2)
2485 int consecutive_zeros = 0;
2487 if (!(remainder & (3 << i)))
2489 while ((i < 32) && !(remainder & (3 << i)))
2491 consecutive_zeros += 2;
2494 if (consecutive_zeros > best_consecutive_zeros)
2496 best_consecutive_zeros = consecutive_zeros;
2497 best_start = i - consecutive_zeros;
2503 /* So long as it won't require any more insns to do so, it's
2504 desirable to emit a small constant (in bits 0...9) in the last
2505 insn. This way there is more chance that it can be combined with
2506 a later addressing insn to form a pre-indexed load or store
2507 operation. Consider:
2509 *((volatile int *)0xe0000100) = 1;
2510 *((volatile int *)0xe0000110) = 2;
2512 We want this to wind up as:
2516 str rB, [rA, #0x100]
2518 str rB, [rA, #0x110]
2520 rather than having to synthesize both large constants from scratch.
2522 Therefore, we calculate how many insns would be required to emit
2523 the constant starting from `best_start', and also starting from
2524 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2525 yield a shorter sequence, we may as well use zero. */
2527 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2528 && (count_insns_for_constant (remainder, 0) <=
2529 count_insns_for_constant (remainder, best_start)))
2535 /* Emit an instruction with the indicated PATTERN. If COND is
2536 non-NULL, conditionalize the execution of the instruction on COND
2540 emit_constant_insn (rtx cond, rtx pattern)
2543 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2544 emit_insn (pattern);
2547 /* As above, but extra parameter GENERATE which, if clear, suppresses
2549 /* ??? This needs more work for thumb2. */
2552 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2553 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2558 int final_invert = 0;
2559 int can_negate_initial = 0;
2561 int num_bits_set = 0;
2562 int set_sign_bit_copies = 0;
2563 int clear_sign_bit_copies = 0;
2564 int clear_zero_bit_copies = 0;
2565 int set_zero_bit_copies = 0;
2567 unsigned HOST_WIDE_INT temp1, temp2;
2568 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2569 int step_size = TARGET_ARM ? 2 : 1;
2571 /* Find out which operations are safe for a given CODE. Also do a quick
2572 check for degenerate cases; these can occur when DImode operations
2583 can_negate_initial = 1;
2587 if (remainder == 0xffffffff)
2590 emit_constant_insn (cond,
2591 gen_rtx_SET (VOIDmode, target,
2592 GEN_INT (ARM_SIGN_EXTEND (val))));
2598 if (reload_completed && rtx_equal_p (target, source))
2602 emit_constant_insn (cond,
2603 gen_rtx_SET (VOIDmode, target, source));
2612 emit_constant_insn (cond,
2613 gen_rtx_SET (VOIDmode, target, const0_rtx));
2616 if (remainder == 0xffffffff)
2618 if (reload_completed && rtx_equal_p (target, source))
2621 emit_constant_insn (cond,
2622 gen_rtx_SET (VOIDmode, target, source));
2631 if (reload_completed && rtx_equal_p (target, source))
2634 emit_constant_insn (cond,
2635 gen_rtx_SET (VOIDmode, target, source));
2639 if (remainder == 0xffffffff)
2642 emit_constant_insn (cond,
2643 gen_rtx_SET (VOIDmode, target,
2644 gen_rtx_NOT (mode, source)));
2650 /* We treat MINUS as (val - source), since (source - val) is always
2651 passed as (source + (-val)). */
2655 emit_constant_insn (cond,
2656 gen_rtx_SET (VOIDmode, target,
2657 gen_rtx_NEG (mode, source)));
2660 if (const_ok_for_arm (val))
2663 emit_constant_insn (cond,
2664 gen_rtx_SET (VOIDmode, target,
2665 gen_rtx_MINUS (mode, GEN_INT (val),
2677 /* If we can do it in one insn get out quickly. */
2678 if (const_ok_for_op (val, code))
2681 emit_constant_insn (cond,
2682 gen_rtx_SET (VOIDmode, target,
2684 ? gen_rtx_fmt_ee (code, mode, source,
2690 /* Calculate a few attributes that may be useful for specific
2692 /* Count number of leading zeros. */
2693 for (i = 31; i >= 0; i--)
2695 if ((remainder & (1 << i)) == 0)
2696 clear_sign_bit_copies++;
2701 /* Count number of leading 1's. */
2702 for (i = 31; i >= 0; i--)
2704 if ((remainder & (1 << i)) != 0)
2705 set_sign_bit_copies++;
2710 /* Count number of trailing zero's. */
2711 for (i = 0; i <= 31; i++)
2713 if ((remainder & (1 << i)) == 0)
2714 clear_zero_bit_copies++;
2719 /* Count number of trailing 1's. */
2720 for (i = 0; i <= 31; i++)
2722 if ((remainder & (1 << i)) != 0)
2723 set_zero_bit_copies++;
2731 /* See if we can do this by sign_extending a constant that is known
2732 to be negative. This is a good, way of doing it, since the shift
2733 may well merge into a subsequent insn. */
2734 if (set_sign_bit_copies > 1)
2736 if (const_ok_for_arm
2737 (temp1 = ARM_SIGN_EXTEND (remainder
2738 << (set_sign_bit_copies - 1))))
2742 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2743 emit_constant_insn (cond,
2744 gen_rtx_SET (VOIDmode, new_src,
2746 emit_constant_insn (cond,
2747 gen_ashrsi3 (target, new_src,
2748 GEN_INT (set_sign_bit_copies - 1)));
2752 /* For an inverted constant, we will need to set the low bits,
2753 these will be shifted out of harm's way. */
2754 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2755 if (const_ok_for_arm (~temp1))
2759 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2760 emit_constant_insn (cond,
2761 gen_rtx_SET (VOIDmode, new_src,
2763 emit_constant_insn (cond,
2764 gen_ashrsi3 (target, new_src,
2765 GEN_INT (set_sign_bit_copies - 1)));
2771 /* See if we can calculate the value as the difference between two
2772 valid immediates. */
2773 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2775 int topshift = clear_sign_bit_copies & ~1;
2777 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2778 & (0xff000000 >> topshift));
2780 /* If temp1 is zero, then that means the 9 most significant
2781 bits of remainder were 1 and we've caused it to overflow.
2782 When topshift is 0 we don't need to do anything since we
2783 can borrow from 'bit 32'. */
2784 if (temp1 == 0 && topshift != 0)
2785 temp1 = 0x80000000 >> (topshift - 1);
2787 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2789 if (const_ok_for_arm (temp2))
2793 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2794 emit_constant_insn (cond,
2795 gen_rtx_SET (VOIDmode, new_src,
2797 emit_constant_insn (cond,
2798 gen_addsi3 (target, new_src,
2806 /* See if we can generate this by setting the bottom (or the top)
2807 16 bits, and then shifting these into the other half of the
2808 word. We only look for the simplest cases, to do more would cost
2809 too much. Be careful, however, not to generate this when the
2810 alternative would take fewer insns. */
2811 if (val & 0xffff0000)
2813 temp1 = remainder & 0xffff0000;
2814 temp2 = remainder & 0x0000ffff;
2816 /* Overlaps outside this range are best done using other methods. */
2817 for (i = 9; i < 24; i++)
2819 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2820 && !const_ok_for_arm (temp2))
2822 rtx new_src = (subtargets
2823 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2825 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2826 source, subtargets, generate);
2834 gen_rtx_ASHIFT (mode, source,
2841 /* Don't duplicate cases already considered. */
2842 for (i = 17; i < 24; i++)
2844 if (((temp1 | (temp1 >> i)) == remainder)
2845 && !const_ok_for_arm (temp1))
2847 rtx new_src = (subtargets
2848 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2850 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2851 source, subtargets, generate);
2856 gen_rtx_SET (VOIDmode, target,
2859 gen_rtx_LSHIFTRT (mode, source,
2870 /* If we have IOR or XOR, and the constant can be loaded in a
2871 single instruction, and we can find a temporary to put it in,
2872 then this can be done in two instructions instead of 3-4. */
2874 /* TARGET can't be NULL if SUBTARGETS is 0 */
2875 || (reload_completed && !reg_mentioned_p (target, source)))
2877 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2881 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2883 emit_constant_insn (cond,
2884 gen_rtx_SET (VOIDmode, sub,
2886 emit_constant_insn (cond,
2887 gen_rtx_SET (VOIDmode, target,
2888 gen_rtx_fmt_ee (code, mode,
2899 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2900 and the remainder 0s for e.g. 0xfff00000)
2901 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2903 This can be done in 2 instructions by using shifts with mov or mvn.
2908 mvn r0, r0, lsr #12 */
2909 if (set_sign_bit_copies > 8
2910 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2914 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2915 rtx shift = GEN_INT (set_sign_bit_copies);
2919 gen_rtx_SET (VOIDmode, sub,
2921 gen_rtx_ASHIFT (mode,
2926 gen_rtx_SET (VOIDmode, target,
2928 gen_rtx_LSHIFTRT (mode, sub,
2935 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2937 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2939 For eg. r0 = r0 | 0xfff
2944 if (set_zero_bit_copies > 8
2945 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2949 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2950 rtx shift = GEN_INT (set_zero_bit_copies);
2954 gen_rtx_SET (VOIDmode, sub,
2956 gen_rtx_LSHIFTRT (mode,
2961 gen_rtx_SET (VOIDmode, target,
2963 gen_rtx_ASHIFT (mode, sub,
2969 /* This will never be reached for Thumb2 because orn is a valid
2970 instruction. This is for Thumb1 and the ARM 32 bit cases.
2972 x = y | constant (such that ~constant is a valid constant)
2974 x = ~(~y & ~constant).
2976 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2980 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2981 emit_constant_insn (cond,
2982 gen_rtx_SET (VOIDmode, sub,
2983 gen_rtx_NOT (mode, source)));
2986 sub = gen_reg_rtx (mode);
2987 emit_constant_insn (cond,
2988 gen_rtx_SET (VOIDmode, sub,
2989 gen_rtx_AND (mode, source,
2991 emit_constant_insn (cond,
2992 gen_rtx_SET (VOIDmode, target,
2993 gen_rtx_NOT (mode, sub)));
3000 /* See if two shifts will do 2 or more insn's worth of work. */
3001 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3003 HOST_WIDE_INT shift_mask = ((0xffffffff
3004 << (32 - clear_sign_bit_copies))
3007 if ((remainder | shift_mask) != 0xffffffff)
3011 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3012 insns = arm_gen_constant (AND, mode, cond,
3013 remainder | shift_mask,
3014 new_src, source, subtargets, 1);
3019 rtx targ = subtargets ? NULL_RTX : target;
3020 insns = arm_gen_constant (AND, mode, cond,
3021 remainder | shift_mask,
3022 targ, source, subtargets, 0);
3028 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3029 rtx shift = GEN_INT (clear_sign_bit_copies);
3031 emit_insn (gen_ashlsi3 (new_src, source, shift));
3032 emit_insn (gen_lshrsi3 (target, new_src, shift));
3038 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3040 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3042 if ((remainder | shift_mask) != 0xffffffff)
3046 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3048 insns = arm_gen_constant (AND, mode, cond,
3049 remainder | shift_mask,
3050 new_src, source, subtargets, 1);
3055 rtx targ = subtargets ? NULL_RTX : target;
3057 insns = arm_gen_constant (AND, mode, cond,
3058 remainder | shift_mask,
3059 targ, source, subtargets, 0);
3065 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3066 rtx shift = GEN_INT (clear_zero_bit_copies);
3068 emit_insn (gen_lshrsi3 (new_src, source, shift));
3069 emit_insn (gen_ashlsi3 (target, new_src, shift));
3081 for (i = 0; i < 32; i++)
3082 if (remainder & (1 << i))
3085 if ((code == AND) || (can_invert && num_bits_set > 16))
3086 remainder ^= 0xffffffff;
3087 else if (code == PLUS && num_bits_set > 16)
3088 remainder = (-remainder) & 0xffffffff;
3090 /* For XOR, if more than half the bits are set and there's a sequence
3091 of more than 8 consecutive ones in the pattern then we can XOR by the
3092 inverted constant and then invert the final result; this may save an
3093 instruction and might also lead to the final mvn being merged with
3094 some other operation. */
3095 else if (code == XOR && num_bits_set > 16
3096 && (count_insns_for_constant (remainder ^ 0xffffffff,
3098 (remainder ^ 0xffffffff))
3099 < count_insns_for_constant (remainder,
3100 find_best_start (remainder))))
3102 remainder ^= 0xffffffff;
3111 /* Now try and find a way of doing the job in either two or three
3113 We start by looking for the largest block of zeros that are aligned on
3114 a 2-bit boundary, we then fill up the temps, wrapping around to the
3115 top of the word when we drop off the bottom.
3116 In the worst case this code should produce no more than four insns.
3117 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3118 best place to start. */
3120 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3123 /* Now start emitting the insns. */
3124 i = find_best_start (remainder);
3131 if (remainder & (3 << (i - 2)))
3136 temp1 = remainder & ((0x0ff << end)
3137 | ((i < end) ? (0xff >> (32 - end)) : 0));
3138 remainder &= ~temp1;
3142 rtx new_src, temp1_rtx;
3144 if (code == SET || code == MINUS)
3146 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3147 if (can_invert && code != MINUS)
3152 if ((final_invert || remainder) && subtargets)
3153 new_src = gen_reg_rtx (mode);
3158 else if (can_negate)
3162 temp1 = trunc_int_for_mode (temp1, mode);
3163 temp1_rtx = GEN_INT (temp1);
3167 else if (code == MINUS)
3168 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3170 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3172 emit_constant_insn (cond,
3173 gen_rtx_SET (VOIDmode, new_src,
3183 else if (code == MINUS)
3189 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3199 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3200 gen_rtx_NOT (mode, source)));
3207 /* Canonicalize a comparison so that we are more likely to recognize it.
3208 This can be done for a few constant compares, where we can make the
3209 immediate value easier to load. */
3212 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3214 enum machine_mode mode;
3215 unsigned HOST_WIDE_INT i, maxval;
3217 mode = GET_MODE (*op0);
3218 if (mode == VOIDmode)
3219 mode = GET_MODE (*op1);
3221 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3223 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3224 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3225 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3226 for GTU/LEU in Thumb mode. */
3231 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3233 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3236 if (code == GT || code == LE
3237 || (!TARGET_ARM && (code == GTU || code == LEU)))
3239 /* Missing comparison. First try to use an available
3241 if (GET_CODE (*op1) == CONST_INT)
3249 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3251 *op1 = GEN_INT (i + 1);
3252 return code == GT ? GE : LT;
3257 if (i != ~((unsigned HOST_WIDE_INT) 0)
3258 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3260 *op1 = GEN_INT (i + 1);
3261 return code == GTU ? GEU : LTU;
3269 /* If that did not work, reverse the condition. */
3273 return swap_condition (code);
3279 /* Comparisons smaller than DImode. Only adjust comparisons against
3280 an out-of-range constant. */
3281 if (GET_CODE (*op1) != CONST_INT
3282 || const_ok_for_arm (INTVAL (*op1))
3283 || const_ok_for_arm (- INTVAL (*op1)))
3297 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3299 *op1 = GEN_INT (i + 1);
3300 return code == GT ? GE : LT;
3307 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3309 *op1 = GEN_INT (i - 1);
3310 return code == GE ? GT : LE;
3316 if (i != ~((unsigned HOST_WIDE_INT) 0)
3317 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3319 *op1 = GEN_INT (i + 1);
3320 return code == GTU ? GEU : LTU;
3327 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3329 *op1 = GEN_INT (i - 1);
3330 return code == GEU ? GTU : LEU;
3342 /* Define how to find the value returned by a function. */
3345 arm_function_value(const_tree type, const_tree func,
3346 bool outgoing ATTRIBUTE_UNUSED)
3348 enum machine_mode mode;
3349 int unsignedp ATTRIBUTE_UNUSED;
3350 rtx r ATTRIBUTE_UNUSED;
3352 mode = TYPE_MODE (type);
3354 if (TARGET_AAPCS_BASED)
3355 return aapcs_allocate_return_reg (mode, type, func);
3357 /* Promote integer types. */
3358 if (INTEGRAL_TYPE_P (type))
3359 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3361 /* Promotes small structs returned in a register to full-word size
3362 for big-endian AAPCS. */
3363 if (arm_return_in_msb (type))
3365 HOST_WIDE_INT size = int_size_in_bytes (type);
3366 if (size % UNITS_PER_WORD != 0)
3368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3373 return LIBCALL_VALUE (mode);
3377 libcall_eq (const void *p1, const void *p2)
3379 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3383 libcall_hash (const void *p1)
3385 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3389 add_libcall (htab_t htab, rtx libcall)
3391 *htab_find_slot (htab, libcall, INSERT) = libcall;
3395 arm_libcall_uses_aapcs_base (const_rtx libcall)
3397 static bool init_done = false;
3398 static htab_t libcall_htab;
3404 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3406 add_libcall (libcall_htab,
3407 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3408 add_libcall (libcall_htab,
3409 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3410 add_libcall (libcall_htab,
3411 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3412 add_libcall (libcall_htab,
3413 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3415 add_libcall (libcall_htab,
3416 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3417 add_libcall (libcall_htab,
3418 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3419 add_libcall (libcall_htab,
3420 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3421 add_libcall (libcall_htab,
3422 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3424 add_libcall (libcall_htab,
3425 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3426 add_libcall (libcall_htab,
3427 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3428 add_libcall (libcall_htab,
3429 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3430 add_libcall (libcall_htab,
3431 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3432 add_libcall (libcall_htab,
3433 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3434 add_libcall (libcall_htab,
3435 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3438 return libcall && htab_find (libcall_htab, libcall) != NULL;
3442 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3444 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3445 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3447 /* The following libcalls return their result in integer registers,
3448 even though they return a floating point value. */
3449 if (arm_libcall_uses_aapcs_base (libcall))
3450 return gen_rtx_REG (mode, ARG_REGISTER(1));
3454 return LIBCALL_VALUE (mode);
3457 /* Determine the amount of memory needed to store the possible return
3458 registers of an untyped call. */
3460 arm_apply_result_size (void)
3466 if (TARGET_HARD_FLOAT_ABI)
3472 if (TARGET_MAVERICK)
3475 if (TARGET_IWMMXT_ABI)
3482 /* Decide whether TYPE should be returned in memory (true)
3483 or in a register (false). FNTYPE is the type of the function making
3486 arm_return_in_memory (const_tree type, const_tree fntype)
3490 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3492 if (TARGET_AAPCS_BASED)
3494 /* Simple, non-aggregate types (ie not including vectors and
3495 complex) are always returned in a register (or registers).
3496 We don't care about which register here, so we can short-cut
3497 some of the detail. */
3498 if (!AGGREGATE_TYPE_P (type)
3499 && TREE_CODE (type) != VECTOR_TYPE
3500 && TREE_CODE (type) != COMPLEX_TYPE)
3503 /* Any return value that is no larger than one word can be
3505 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3508 /* Check any available co-processors to see if they accept the
3509 type as a register candidate (VFP, for example, can return
3510 some aggregates in consecutive registers). These aren't
3511 available if the call is variadic. */
3512 if (aapcs_select_return_coproc (type, fntype) >= 0)
3515 /* Vector values should be returned using ARM registers, not
3516 memory (unless they're over 16 bytes, which will break since
3517 we only have four call-clobbered registers to play with). */
3518 if (TREE_CODE (type) == VECTOR_TYPE)
3519 return (size < 0 || size > (4 * UNITS_PER_WORD));
3521 /* The rest go in memory. */
3525 if (TREE_CODE (type) == VECTOR_TYPE)
3526 return (size < 0 || size > (4 * UNITS_PER_WORD));
3528 if (!AGGREGATE_TYPE_P (type) &&
3529 (TREE_CODE (type) != VECTOR_TYPE))
3530 /* All simple types are returned in registers. */
3533 if (arm_abi != ARM_ABI_APCS)
3535 /* ATPCS and later return aggregate types in memory only if they are
3536 larger than a word (or are variable size). */
3537 return (size < 0 || size > UNITS_PER_WORD);
3540 /* For the arm-wince targets we choose to be compatible with Microsoft's
3541 ARM and Thumb compilers, which always return aggregates in memory. */
3543 /* All structures/unions bigger than one word are returned in memory.
3544 Also catch the case where int_size_in_bytes returns -1. In this case
3545 the aggregate is either huge or of variable size, and in either case
3546 we will want to return it via memory and not in a register. */
3547 if (size < 0 || size > UNITS_PER_WORD)
3550 if (TREE_CODE (type) == RECORD_TYPE)
3554 /* For a struct the APCS says that we only return in a register
3555 if the type is 'integer like' and every addressable element
3556 has an offset of zero. For practical purposes this means
3557 that the structure can have at most one non bit-field element
3558 and that this element must be the first one in the structure. */
3560 /* Find the first field, ignoring non FIELD_DECL things which will
3561 have been created by C++. */
3562 for (field = TYPE_FIELDS (type);
3563 field && TREE_CODE (field) != FIELD_DECL;
3564 field = DECL_CHAIN (field))
3568 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3570 /* Check that the first field is valid for returning in a register. */
3572 /* ... Floats are not allowed */
3573 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3576 /* ... Aggregates that are not themselves valid for returning in
3577 a register are not allowed. */
3578 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3581 /* Now check the remaining fields, if any. Only bitfields are allowed,
3582 since they are not addressable. */
3583 for (field = DECL_CHAIN (field);
3585 field = DECL_CHAIN (field))
3587 if (TREE_CODE (field) != FIELD_DECL)
3590 if (!DECL_BIT_FIELD_TYPE (field))
3597 if (TREE_CODE (type) == UNION_TYPE)
3601 /* Unions can be returned in registers if every element is
3602 integral, or can be returned in an integer register. */
3603 for (field = TYPE_FIELDS (type);
3605 field = DECL_CHAIN (field))
3607 if (TREE_CODE (field) != FIELD_DECL)
3610 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3613 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3619 #endif /* not ARM_WINCE */
3621 /* Return all other types in memory. */
3625 /* Indicate whether or not words of a double are in big-endian order. */
3628 arm_float_words_big_endian (void)
3630 if (TARGET_MAVERICK)
3633 /* For FPA, float words are always big-endian. For VFP, floats words
3634 follow the memory system mode. */
3642 return (TARGET_BIG_END ? 1 : 0);
3647 const struct pcs_attribute_arg
3651 } pcs_attribute_args[] =
3653 {"aapcs", ARM_PCS_AAPCS},
3654 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3656 /* We could recognize these, but changes would be needed elsewhere
3657 * to implement them. */
3658 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3659 {"atpcs", ARM_PCS_ATPCS},
3660 {"apcs", ARM_PCS_APCS},
3662 {NULL, ARM_PCS_UNKNOWN}
3666 arm_pcs_from_attribute (tree attr)
3668 const struct pcs_attribute_arg *ptr;
3671 /* Get the value of the argument. */
3672 if (TREE_VALUE (attr) == NULL_TREE
3673 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3674 return ARM_PCS_UNKNOWN;
3676 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3678 /* Check it against the list of known arguments. */
3679 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3680 if (streq (arg, ptr->arg))
3683 /* An unrecognized interrupt type. */
3684 return ARM_PCS_UNKNOWN;
3687 /* Get the PCS variant to use for this call. TYPE is the function's type
3688 specification, DECL is the specific declartion. DECL may be null if
3689 the call could be indirect or if this is a library call. */
3691 arm_get_pcs_model (const_tree type, const_tree decl)
3693 bool user_convention = false;
3694 enum arm_pcs user_pcs = arm_pcs_default;
3699 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3702 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3703 user_convention = true;
3706 if (TARGET_AAPCS_BASED)
3708 /* Detect varargs functions. These always use the base rules
3709 (no argument is ever a candidate for a co-processor
3711 bool base_rules = stdarg_p (type);
3713 if (user_convention)
3715 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3716 sorry ("non-AAPCS derived PCS variant");
3717 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3718 error ("variadic functions must use the base AAPCS variant");
3722 return ARM_PCS_AAPCS;
3723 else if (user_convention)
3725 else if (decl && flag_unit_at_a_time)
3727 /* Local functions never leak outside this compilation unit,
3728 so we are free to use whatever conventions are
3730 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3731 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3733 return ARM_PCS_AAPCS_LOCAL;
3736 else if (user_convention && user_pcs != arm_pcs_default)
3737 sorry ("PCS variant");
3739 /* For everything else we use the target's default. */
3740 return arm_pcs_default;
3745 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3746 const_tree fntype ATTRIBUTE_UNUSED,
3747 rtx libcall ATTRIBUTE_UNUSED,
3748 const_tree fndecl ATTRIBUTE_UNUSED)
3750 /* Record the unallocated VFP registers. */
3751 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3752 pcum->aapcs_vfp_reg_alloc = 0;
3755 /* Walk down the type tree of TYPE counting consecutive base elements.
3756 If *MODEP is VOIDmode, then set it to the first valid floating point
3757 type. If a non-floating point type is found, or if a floating point
3758 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3759 otherwise return the count in the sub-tree. */
3761 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3763 enum machine_mode mode;
3766 switch (TREE_CODE (type))
3769 mode = TYPE_MODE (type);
3770 if (mode != DFmode && mode != SFmode)
3773 if (*modep == VOIDmode)
3782 mode = TYPE_MODE (TREE_TYPE (type));
3783 if (mode != DFmode && mode != SFmode)
3786 if (*modep == VOIDmode)
3795 /* Use V2SImode and V4SImode as representatives of all 64-bit
3796 and 128-bit vector types, whether or not those modes are
3797 supported with the present options. */
3798 size = int_size_in_bytes (type);
3811 if (*modep == VOIDmode)
3814 /* Vector modes are considered to be opaque: two vectors are
3815 equivalent for the purposes of being homogeneous aggregates
3816 if they are the same size. */
3825 tree index = TYPE_DOMAIN (type);
3827 /* Can't handle incomplete types. */
3828 if (!COMPLETE_TYPE_P(type))
3831 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3834 || !TYPE_MAX_VALUE (index)
3835 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3836 || !TYPE_MIN_VALUE (index)
3837 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3841 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3842 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3844 /* There must be no padding. */
3845 if (!host_integerp (TYPE_SIZE (type), 1)
3846 || (tree_low_cst (TYPE_SIZE (type), 1)
3847 != count * GET_MODE_BITSIZE (*modep)))
3859 /* Can't handle incomplete types. */
3860 if (!COMPLETE_TYPE_P(type))
3863 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3865 if (TREE_CODE (field) != FIELD_DECL)
3868 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3874 /* There must be no padding. */
3875 if (!host_integerp (TYPE_SIZE (type), 1)
3876 || (tree_low_cst (TYPE_SIZE (type), 1)
3877 != count * GET_MODE_BITSIZE (*modep)))
3884 case QUAL_UNION_TYPE:
3886 /* These aren't very interesting except in a degenerate case. */
3891 /* Can't handle incomplete types. */
3892 if (!COMPLETE_TYPE_P(type))
3895 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3897 if (TREE_CODE (field) != FIELD_DECL)
3900 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3903 count = count > sub_count ? count : sub_count;
3906 /* There must be no padding. */
3907 if (!host_integerp (TYPE_SIZE (type), 1)
3908 || (tree_low_cst (TYPE_SIZE (type), 1)
3909 != count * GET_MODE_BITSIZE (*modep)))
3922 /* Return true if PCS_VARIANT should use VFP registers. */
3924 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3926 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3928 static bool seen_thumb1_vfp = false;
3930 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3932 sorry ("Thumb-1 hard-float VFP ABI");
3933 /* sorry() is not immediately fatal, so only display this once. */
3934 seen_thumb1_vfp = true;
3940 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3943 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3944 (TARGET_VFP_DOUBLE || !is_double));
3948 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3949 enum machine_mode mode, const_tree type,
3950 enum machine_mode *base_mode, int *count)
3952 enum machine_mode new_mode = VOIDmode;
3954 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3955 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3956 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3961 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3964 new_mode = (mode == DCmode ? DFmode : SFmode);
3966 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3968 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3970 if (ag_count > 0 && ag_count <= 4)
3979 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3982 *base_mode = new_mode;
3987 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3988 enum machine_mode mode, const_tree type)
3990 int count ATTRIBUTE_UNUSED;
3991 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3993 if (!use_vfp_abi (pcs_variant, false))
3995 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4000 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4003 if (!use_vfp_abi (pcum->pcs_variant, false))
4006 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4007 &pcum->aapcs_vfp_rmode,
4008 &pcum->aapcs_vfp_rcount);
4012 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4013 const_tree type ATTRIBUTE_UNUSED)
4015 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4016 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4019 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4020 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4022 pcum->aapcs_vfp_reg_alloc = mask << regno;
4023 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4026 int rcount = pcum->aapcs_vfp_rcount;
4028 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4032 /* Avoid using unsupported vector modes. */
4033 if (rmode == V2SImode)
4035 else if (rmode == V4SImode)
4042 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4043 for (i = 0; i < rcount; i++)
4045 rtx tmp = gen_rtx_REG (rmode,
4046 FIRST_VFP_REGNUM + regno + i * rshift);
4047 tmp = gen_rtx_EXPR_LIST
4049 GEN_INT (i * GET_MODE_SIZE (rmode)));
4050 XVECEXP (par, 0, i) = tmp;
4053 pcum->aapcs_reg = par;
4056 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4063 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4064 enum machine_mode mode,
4065 const_tree type ATTRIBUTE_UNUSED)
4067 if (!use_vfp_abi (pcs_variant, false))
4070 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4073 enum machine_mode ag_mode;
4078 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4083 if (ag_mode == V2SImode)
4085 else if (ag_mode == V4SImode)
4091 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4092 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4093 for (i = 0; i < count; i++)
4095 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4096 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4097 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4098 XVECEXP (par, 0, i) = tmp;
4104 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4108 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4109 enum machine_mode mode ATTRIBUTE_UNUSED,
4110 const_tree type ATTRIBUTE_UNUSED)
4112 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4113 pcum->aapcs_vfp_reg_alloc = 0;
4117 #define AAPCS_CP(X) \
4119 aapcs_ ## X ## _cum_init, \
4120 aapcs_ ## X ## _is_call_candidate, \
4121 aapcs_ ## X ## _allocate, \
4122 aapcs_ ## X ## _is_return_candidate, \
4123 aapcs_ ## X ## _allocate_return_reg, \
4124 aapcs_ ## X ## _advance \
4127 /* Table of co-processors that can be used to pass arguments in
4128 registers. Idealy no arugment should be a candidate for more than
4129 one co-processor table entry, but the table is processed in order
4130 and stops after the first match. If that entry then fails to put
4131 the argument into a co-processor register, the argument will go on
4135 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4136 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4138 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4139 BLKmode) is a candidate for this co-processor's registers; this
4140 function should ignore any position-dependent state in
4141 CUMULATIVE_ARGS and only use call-type dependent information. */
4142 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4144 /* Return true if the argument does get a co-processor register; it
4145 should set aapcs_reg to an RTX of the register allocated as is
4146 required for a return from FUNCTION_ARG. */
4147 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4149 /* Return true if a result of mode MODE (or type TYPE if MODE is
4150 BLKmode) is can be returned in this co-processor's registers. */
4151 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4153 /* Allocate and return an RTX element to hold the return type of a
4154 call, this routine must not fail and will only be called if
4155 is_return_candidate returned true with the same parameters. */
4156 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4158 /* Finish processing this argument and prepare to start processing
4160 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4161 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4169 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4174 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4175 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4182 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4184 /* We aren't passed a decl, so we can't check that a call is local.
4185 However, it isn't clear that that would be a win anyway, since it
4186 might limit some tail-calling opportunities. */
4187 enum arm_pcs pcs_variant;
4191 const_tree fndecl = NULL_TREE;
4193 if (TREE_CODE (fntype) == FUNCTION_DECL)
4196 fntype = TREE_TYPE (fntype);
4199 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4202 pcs_variant = arm_pcs_default;
4204 if (pcs_variant != ARM_PCS_AAPCS)
4208 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4209 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4218 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4221 /* We aren't passed a decl, so we can't check that a call is local.
4222 However, it isn't clear that that would be a win anyway, since it
4223 might limit some tail-calling opportunities. */
4224 enum arm_pcs pcs_variant;
4225 int unsignedp ATTRIBUTE_UNUSED;
4229 const_tree fndecl = NULL_TREE;
4231 if (TREE_CODE (fntype) == FUNCTION_DECL)
4234 fntype = TREE_TYPE (fntype);
4237 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4240 pcs_variant = arm_pcs_default;
4242 /* Promote integer types. */
4243 if (type && INTEGRAL_TYPE_P (type))
4244 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4246 if (pcs_variant != ARM_PCS_AAPCS)
4250 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4251 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4253 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4257 /* Promotes small structs returned in a register to full-word size
4258 for big-endian AAPCS. */
4259 if (type && arm_return_in_msb (type))
4261 HOST_WIDE_INT size = int_size_in_bytes (type);
4262 if (size % UNITS_PER_WORD != 0)
4264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4269 return gen_rtx_REG (mode, R0_REGNUM);
4273 aapcs_libcall_value (enum machine_mode mode)
4275 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4278 /* Lay out a function argument using the AAPCS rules. The rule
4279 numbers referred to here are those in the AAPCS. */
4281 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4282 const_tree type, bool named)
4287 /* We only need to do this once per argument. */
4288 if (pcum->aapcs_arg_processed)
4291 pcum->aapcs_arg_processed = true;
4293 /* Special case: if named is false then we are handling an incoming
4294 anonymous argument which is on the stack. */
4298 /* Is this a potential co-processor register candidate? */
4299 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4301 int slot = aapcs_select_call_coproc (pcum, mode, type);
4302 pcum->aapcs_cprc_slot = slot;
4304 /* We don't have to apply any of the rules from part B of the
4305 preparation phase, these are handled elsewhere in the
4310 /* A Co-processor register candidate goes either in its own
4311 class of registers or on the stack. */
4312 if (!pcum->aapcs_cprc_failed[slot])
4314 /* C1.cp - Try to allocate the argument to co-processor
4316 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4319 /* C2.cp - Put the argument on the stack and note that we
4320 can't assign any more candidates in this slot. We also
4321 need to note that we have allocated stack space, so that
4322 we won't later try to split a non-cprc candidate between
4323 core registers and the stack. */
4324 pcum->aapcs_cprc_failed[slot] = true;
4325 pcum->can_split = false;
4328 /* We didn't get a register, so this argument goes on the
4330 gcc_assert (pcum->can_split == false);
4335 /* C3 - For double-word aligned arguments, round the NCRN up to the
4336 next even number. */
4337 ncrn = pcum->aapcs_ncrn;
4338 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4341 nregs = ARM_NUM_REGS2(mode, type);
4343 /* Sigh, this test should really assert that nregs > 0, but a GCC
4344 extension allows empty structs and then gives them empty size; it
4345 then allows such a structure to be passed by value. For some of
4346 the code below we have to pretend that such an argument has
4347 non-zero size so that we 'locate' it correctly either in
4348 registers or on the stack. */
4349 gcc_assert (nregs >= 0);
4351 nregs2 = nregs ? nregs : 1;
4353 /* C4 - Argument fits entirely in core registers. */
4354 if (ncrn + nregs2 <= NUM_ARG_REGS)
4356 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4357 pcum->aapcs_next_ncrn = ncrn + nregs;
4361 /* C5 - Some core registers left and there are no arguments already
4362 on the stack: split this argument between the remaining core
4363 registers and the stack. */
4364 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4366 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4367 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4368 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4372 /* C6 - NCRN is set to 4. */
4373 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4375 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4379 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4380 for a call to a function whose data type is FNTYPE.
4381 For a library call, FNTYPE is NULL. */
4383 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4385 tree fndecl ATTRIBUTE_UNUSED)
4387 /* Long call handling. */
4389 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4391 pcum->pcs_variant = arm_pcs_default;
4393 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4395 if (arm_libcall_uses_aapcs_base (libname))
4396 pcum->pcs_variant = ARM_PCS_AAPCS;
4398 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4399 pcum->aapcs_reg = NULL_RTX;
4400 pcum->aapcs_partial = 0;
4401 pcum->aapcs_arg_processed = false;
4402 pcum->aapcs_cprc_slot = -1;
4403 pcum->can_split = true;
4405 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4409 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4411 pcum->aapcs_cprc_failed[i] = false;
4412 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4420 /* On the ARM, the offset starts at 0. */
4422 pcum->iwmmxt_nregs = 0;
4423 pcum->can_split = true;
4425 /* Varargs vectors are treated the same as long long.
4426 named_count avoids having to change the way arm handles 'named' */
4427 pcum->named_count = 0;
4430 if (TARGET_REALLY_IWMMXT && fntype)
4434 for (fn_arg = TYPE_ARG_TYPES (fntype);
4436 fn_arg = TREE_CHAIN (fn_arg))
4437 pcum->named_count += 1;
4439 if (! pcum->named_count)
4440 pcum->named_count = INT_MAX;
4445 /* Return true if mode/type need doubleword alignment. */
4447 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4449 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4450 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4454 /* Determine where to put an argument to a function.
4455 Value is zero to push the argument on the stack,
4456 or a hard register in which to store the argument.
4458 MODE is the argument's machine mode.
4459 TYPE is the data type of the argument (as a tree).
4460 This is null for libcalls where that information may
4462 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4463 the preceding args and about the function being called.
4464 NAMED is nonzero if this argument is a named parameter
4465 (otherwise it is an extra parameter matching an ellipsis).
4467 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4468 other arguments are passed on the stack. If (NAMED == 0) (which happens
4469 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4470 defined), say it is passed in the stack (function_prologue will
4471 indeed make it pass in the stack if necessary). */
4474 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4475 const_tree type, bool named)
4479 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4480 a call insn (op3 of a call_value insn). */
4481 if (mode == VOIDmode)
4484 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4486 aapcs_layout_arg (pcum, mode, type, named);
4487 return pcum->aapcs_reg;
4490 /* Varargs vectors are treated the same as long long.
4491 named_count avoids having to change the way arm handles 'named' */
4492 if (TARGET_IWMMXT_ABI
4493 && arm_vector_mode_supported_p (mode)
4494 && pcum->named_count > pcum->nargs + 1)
4496 if (pcum->iwmmxt_nregs <= 9)
4497 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4500 pcum->can_split = false;
4505 /* Put doubleword aligned quantities in even register pairs. */
4507 && ARM_DOUBLEWORD_ALIGN
4508 && arm_needs_doubleword_align (mode, type))
4511 /* Only allow splitting an arg between regs and memory if all preceding
4512 args were allocated to regs. For args passed by reference we only count
4513 the reference pointer. */
4514 if (pcum->can_split)
4517 nregs = ARM_NUM_REGS2 (mode, type);
4519 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4522 return gen_rtx_REG (mode, pcum->nregs);
4526 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4528 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4529 ? DOUBLEWORD_ALIGNMENT
4534 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4535 tree type, bool named)
4537 int nregs = pcum->nregs;
4539 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4541 aapcs_layout_arg (pcum, mode, type, named);
4542 return pcum->aapcs_partial;
4545 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4548 if (NUM_ARG_REGS > nregs
4549 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4551 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4556 /* Update the data in PCUM to advance over an argument
4557 of mode MODE and data type TYPE.
4558 (TYPE is null for libcalls where that information may not be available.) */
4561 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4562 const_tree type, bool named)
4564 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4566 aapcs_layout_arg (pcum, mode, type, named);
4568 if (pcum->aapcs_cprc_slot >= 0)
4570 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4572 pcum->aapcs_cprc_slot = -1;
4575 /* Generic stuff. */
4576 pcum->aapcs_arg_processed = false;
4577 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4578 pcum->aapcs_reg = NULL_RTX;
4579 pcum->aapcs_partial = 0;
4584 if (arm_vector_mode_supported_p (mode)
4585 && pcum->named_count > pcum->nargs
4586 && TARGET_IWMMXT_ABI)
4587 pcum->iwmmxt_nregs += 1;
4589 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4593 /* Variable sized types are passed by reference. This is a GCC
4594 extension to the ARM ABI. */
4597 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4598 enum machine_mode mode ATTRIBUTE_UNUSED,
4599 const_tree type, bool named ATTRIBUTE_UNUSED)
4601 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4604 /* Encode the current state of the #pragma [no_]long_calls. */
4607 OFF, /* No #pragma [no_]long_calls is in effect. */
4608 LONG, /* #pragma long_calls is in effect. */
4609 SHORT /* #pragma no_long_calls is in effect. */
4612 static arm_pragma_enum arm_pragma_long_calls = OFF;
4615 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4617 arm_pragma_long_calls = LONG;
4621 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4623 arm_pragma_long_calls = SHORT;
4627 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4629 arm_pragma_long_calls = OFF;
4632 /* Handle an attribute requiring a FUNCTION_DECL;
4633 arguments as in struct attribute_spec.handler. */
4635 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4636 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4638 if (TREE_CODE (*node) != FUNCTION_DECL)
4640 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4642 *no_add_attrs = true;
4648 /* Handle an "interrupt" or "isr" attribute;
4649 arguments as in struct attribute_spec.handler. */
4651 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4656 if (TREE_CODE (*node) != FUNCTION_DECL)
4658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4660 *no_add_attrs = true;
4662 /* FIXME: the argument if any is checked for type attributes;
4663 should it be checked for decl ones? */
4667 if (TREE_CODE (*node) == FUNCTION_TYPE
4668 || TREE_CODE (*node) == METHOD_TYPE)
4670 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4672 warning (OPT_Wattributes, "%qE attribute ignored",
4674 *no_add_attrs = true;
4677 else if (TREE_CODE (*node) == POINTER_TYPE
4678 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4679 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4680 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4682 *node = build_variant_type_copy (*node);
4683 TREE_TYPE (*node) = build_type_attribute_variant
4685 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4686 *no_add_attrs = true;
4690 /* Possibly pass this attribute on from the type to a decl. */
4691 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4692 | (int) ATTR_FLAG_FUNCTION_NEXT
4693 | (int) ATTR_FLAG_ARRAY_NEXT))
4695 *no_add_attrs = true;
4696 return tree_cons (name, args, NULL_TREE);
4700 warning (OPT_Wattributes, "%qE attribute ignored",
4709 /* Handle a "pcs" attribute; arguments as in struct
4710 attribute_spec.handler. */
4712 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4713 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4715 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4717 warning (OPT_Wattributes, "%qE attribute ignored", name);
4718 *no_add_attrs = true;
4723 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4724 /* Handle the "notshared" attribute. This attribute is another way of
4725 requesting hidden visibility. ARM's compiler supports
4726 "__declspec(notshared)"; we support the same thing via an
4730 arm_handle_notshared_attribute (tree *node,
4731 tree name ATTRIBUTE_UNUSED,
4732 tree args ATTRIBUTE_UNUSED,
4733 int flags ATTRIBUTE_UNUSED,
4736 tree decl = TYPE_NAME (*node);
4740 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4741 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4742 *no_add_attrs = false;
4748 /* Return 0 if the attributes for two types are incompatible, 1 if they
4749 are compatible, and 2 if they are nearly compatible (which causes a
4750 warning to be generated). */
4752 arm_comp_type_attributes (const_tree type1, const_tree type2)
4756 /* Check for mismatch of non-default calling convention. */
4757 if (TREE_CODE (type1) != FUNCTION_TYPE)
4760 /* Check for mismatched call attributes. */
4761 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4762 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4763 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4764 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4766 /* Only bother to check if an attribute is defined. */
4767 if (l1 | l2 | s1 | s2)
4769 /* If one type has an attribute, the other must have the same attribute. */
4770 if ((l1 != l2) || (s1 != s2))
4773 /* Disallow mixed attributes. */
4774 if ((l1 & s2) || (l2 & s1))
4778 /* Check for mismatched ISR attribute. */
4779 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4781 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4782 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4784 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4791 /* Assigns default attributes to newly defined type. This is used to
4792 set short_call/long_call attributes for function types of
4793 functions defined inside corresponding #pragma scopes. */
4795 arm_set_default_type_attributes (tree type)
4797 /* Add __attribute__ ((long_call)) to all functions, when
4798 inside #pragma long_calls or __attribute__ ((short_call)),
4799 when inside #pragma no_long_calls. */
4800 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4802 tree type_attr_list, attr_name;
4803 type_attr_list = TYPE_ATTRIBUTES (type);
4805 if (arm_pragma_long_calls == LONG)
4806 attr_name = get_identifier ("long_call");
4807 else if (arm_pragma_long_calls == SHORT)
4808 attr_name = get_identifier ("short_call");
4812 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4813 TYPE_ATTRIBUTES (type) = type_attr_list;
4817 /* Return true if DECL is known to be linked into section SECTION. */
4820 arm_function_in_section_p (tree decl, section *section)
4822 /* We can only be certain about functions defined in the same
4823 compilation unit. */
4824 if (!TREE_STATIC (decl))
4827 /* Make sure that SYMBOL always binds to the definition in this
4828 compilation unit. */
4829 if (!targetm.binds_local_p (decl))
4832 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4833 if (!DECL_SECTION_NAME (decl))
4835 /* Make sure that we will not create a unique section for DECL. */
4836 if (flag_function_sections || DECL_ONE_ONLY (decl))
4840 return function_section (decl) == section;
4843 /* Return nonzero if a 32-bit "long_call" should be generated for
4844 a call from the current function to DECL. We generate a long_call
4847 a. has an __attribute__((long call))
4848 or b. is within the scope of a #pragma long_calls
4849 or c. the -mlong-calls command line switch has been specified
4851 However we do not generate a long call if the function:
4853 d. has an __attribute__ ((short_call))
4854 or e. is inside the scope of a #pragma no_long_calls
4855 or f. is defined in the same section as the current function. */
4858 arm_is_long_call_p (tree decl)
4863 return TARGET_LONG_CALLS;
4865 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4866 if (lookup_attribute ("short_call", attrs))
4869 /* For "f", be conservative, and only cater for cases in which the
4870 whole of the current function is placed in the same section. */
4871 if (!flag_reorder_blocks_and_partition
4872 && TREE_CODE (decl) == FUNCTION_DECL
4873 && arm_function_in_section_p (decl, current_function_section ()))
4876 if (lookup_attribute ("long_call", attrs))
4879 return TARGET_LONG_CALLS;
4882 /* Return nonzero if it is ok to make a tail-call to DECL. */
4884 arm_function_ok_for_sibcall (tree decl, tree exp)
4886 unsigned long func_type;
4888 if (cfun->machine->sibcall_blocked)
4891 /* Never tailcall something for which we have no decl, or if we
4892 are generating code for Thumb-1. */
4893 if (decl == NULL || TARGET_THUMB1)
4896 /* The PIC register is live on entry to VxWorks PLT entries, so we
4897 must make the call before restoring the PIC register. */
4898 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4901 /* Cannot tail-call to long calls, since these are out of range of
4902 a branch instruction. */
4903 if (arm_is_long_call_p (decl))
4906 /* If we are interworking and the function is not declared static
4907 then we can't tail-call it unless we know that it exists in this
4908 compilation unit (since it might be a Thumb routine). */
4909 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4912 func_type = arm_current_func_type ();
4913 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4914 if (IS_INTERRUPT (func_type))
4917 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4919 /* Check that the return value locations are the same. For
4920 example that we aren't returning a value from the sibling in
4921 a VFP register but then need to transfer it to a core
4925 a = arm_function_value (TREE_TYPE (exp), decl, false);
4926 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4928 if (!rtx_equal_p (a, b))
4932 /* Never tailcall if function may be called with a misaligned SP. */
4933 if (IS_STACKALIGN (func_type))
4936 /* Everything else is ok. */
4941 /* Addressing mode support functions. */
4943 /* Return nonzero if X is a legitimate immediate operand when compiling
4944 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4946 legitimate_pic_operand_p (rtx x)
4948 if (GET_CODE (x) == SYMBOL_REF
4949 || (GET_CODE (x) == CONST
4950 && GET_CODE (XEXP (x, 0)) == PLUS
4951 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4957 /* Record that the current function needs a PIC register. Initialize
4958 cfun->machine->pic_reg if we have not already done so. */
4961 require_pic_register (void)
4963 /* A lot of the logic here is made obscure by the fact that this
4964 routine gets called as part of the rtx cost estimation process.
4965 We don't want those calls to affect any assumptions about the real
4966 function; and further, we can't call entry_of_function() until we
4967 start the real expansion process. */
4968 if (!crtl->uses_pic_offset_table)
4970 gcc_assert (can_create_pseudo_p ());
4971 if (arm_pic_register != INVALID_REGNUM)
4973 if (!cfun->machine->pic_reg)
4974 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4976 /* Play games to avoid marking the function as needing pic
4977 if we are being called as part of the cost-estimation
4979 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4980 crtl->uses_pic_offset_table = 1;
4986 if (!cfun->machine->pic_reg)
4987 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4989 /* Play games to avoid marking the function as needing pic
4990 if we are being called as part of the cost-estimation
4992 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4994 crtl->uses_pic_offset_table = 1;
4997 arm_load_pic_register (0UL);
5002 for (insn = seq; insn; insn = NEXT_INSN (insn))
5004 INSN_LOCATOR (insn) = prologue_locator;
5006 /* We can be called during expansion of PHI nodes, where
5007 we can't yet emit instructions directly in the final
5008 insn stream. Queue the insns on the entry edge, they will
5009 be committed after everything else is expanded. */
5010 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5017 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5019 if (GET_CODE (orig) == SYMBOL_REF
5020 || GET_CODE (orig) == LABEL_REF)
5026 gcc_assert (can_create_pseudo_p ());
5027 reg = gen_reg_rtx (Pmode);
5030 /* VxWorks does not impose a fixed gap between segments; the run-time
5031 gap can be different from the object-file gap. We therefore can't
5032 use GOTOFF unless we are absolutely sure that the symbol is in the
5033 same segment as the GOT. Unfortunately, the flexibility of linker
5034 scripts means that we can't be sure of that in general, so assume
5035 that GOTOFF is never valid on VxWorks. */
5036 if ((GET_CODE (orig) == LABEL_REF
5037 || (GET_CODE (orig) == SYMBOL_REF &&
5038 SYMBOL_REF_LOCAL_P (orig)))
5040 && !TARGET_VXWORKS_RTP)
5041 insn = arm_pic_static_addr (orig, reg);
5047 /* If this function doesn't have a pic register, create one now. */
5048 require_pic_register ();
5050 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5052 /* Make the MEM as close to a constant as possible. */
5053 mem = SET_SRC (pat);
5054 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5055 MEM_READONLY_P (mem) = 1;
5056 MEM_NOTRAP_P (mem) = 1;
5058 insn = emit_insn (pat);
5061 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5063 set_unique_reg_note (insn, REG_EQUAL, orig);
5067 else if (GET_CODE (orig) == CONST)
5071 if (GET_CODE (XEXP (orig, 0)) == PLUS
5072 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5075 /* Handle the case where we have: const (UNSPEC_TLS). */
5076 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5077 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5080 /* Handle the case where we have:
5081 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5083 if (GET_CODE (XEXP (orig, 0)) == PLUS
5084 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5085 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5087 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5093 gcc_assert (can_create_pseudo_p ());
5094 reg = gen_reg_rtx (Pmode);
5097 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5099 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5100 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5101 base == reg ? 0 : reg);
5103 if (GET_CODE (offset) == CONST_INT)
5105 /* The base register doesn't really matter, we only want to
5106 test the index for the appropriate mode. */
5107 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5109 gcc_assert (can_create_pseudo_p ());
5110 offset = force_reg (Pmode, offset);
5113 if (GET_CODE (offset) == CONST_INT)
5114 return plus_constant (base, INTVAL (offset));
5117 if (GET_MODE_SIZE (mode) > 4
5118 && (GET_MODE_CLASS (mode) == MODE_INT
5119 || TARGET_SOFT_FLOAT))
5121 emit_insn (gen_addsi3 (reg, base, offset));
5125 return gen_rtx_PLUS (Pmode, base, offset);
5132 /* Find a spare register to use during the prolog of a function. */
5135 thumb_find_work_register (unsigned long pushed_regs_mask)
5139 /* Check the argument registers first as these are call-used. The
5140 register allocation order means that sometimes r3 might be used
5141 but earlier argument registers might not, so check them all. */
5142 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5143 if (!df_regs_ever_live_p (reg))
5146 /* Before going on to check the call-saved registers we can try a couple
5147 more ways of deducing that r3 is available. The first is when we are
5148 pushing anonymous arguments onto the stack and we have less than 4
5149 registers worth of fixed arguments(*). In this case r3 will be part of
5150 the variable argument list and so we can be sure that it will be
5151 pushed right at the start of the function. Hence it will be available
5152 for the rest of the prologue.
5153 (*): ie crtl->args.pretend_args_size is greater than 0. */
5154 if (cfun->machine->uses_anonymous_args
5155 && crtl->args.pretend_args_size > 0)
5156 return LAST_ARG_REGNUM;
5158 /* The other case is when we have fixed arguments but less than 4 registers
5159 worth. In this case r3 might be used in the body of the function, but
5160 it is not being used to convey an argument into the function. In theory
5161 we could just check crtl->args.size to see how many bytes are
5162 being passed in argument registers, but it seems that it is unreliable.
5163 Sometimes it will have the value 0 when in fact arguments are being
5164 passed. (See testcase execute/20021111-1.c for an example). So we also
5165 check the args_info.nregs field as well. The problem with this field is
5166 that it makes no allowances for arguments that are passed to the
5167 function but which are not used. Hence we could miss an opportunity
5168 when a function has an unused argument in r3. But it is better to be
5169 safe than to be sorry. */
5170 if (! cfun->machine->uses_anonymous_args
5171 && crtl->args.size >= 0
5172 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5173 && crtl->args.info.nregs < 4)
5174 return LAST_ARG_REGNUM;
5176 /* Otherwise look for a call-saved register that is going to be pushed. */
5177 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5178 if (pushed_regs_mask & (1 << reg))
5183 /* Thumb-2 can use high regs. */
5184 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5185 if (pushed_regs_mask & (1 << reg))
5188 /* Something went wrong - thumb_compute_save_reg_mask()
5189 should have arranged for a suitable register to be pushed. */
5193 static GTY(()) int pic_labelno;
5195 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5199 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5201 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5203 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5206 gcc_assert (flag_pic);
5208 pic_reg = cfun->machine->pic_reg;
5209 if (TARGET_VXWORKS_RTP)
5211 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5212 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5213 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5215 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5217 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5218 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5222 /* We use an UNSPEC rather than a LABEL_REF because this label
5223 never appears in the code stream. */
5225 labelno = GEN_INT (pic_labelno++);
5226 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5227 l1 = gen_rtx_CONST (VOIDmode, l1);
5229 /* On the ARM the PC register contains 'dot + 8' at the time of the
5230 addition, on the Thumb it is 'dot + 4'. */
5231 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5232 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5234 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5238 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5240 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5242 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5244 else /* TARGET_THUMB1 */
5246 if (arm_pic_register != INVALID_REGNUM
5247 && REGNO (pic_reg) > LAST_LO_REGNUM)
5249 /* We will have pushed the pic register, so we should always be
5250 able to find a work register. */
5251 pic_tmp = gen_rtx_REG (SImode,
5252 thumb_find_work_register (saved_regs));
5253 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5254 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5257 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5258 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5262 /* Need to emit this whether or not we obey regdecls,
5263 since setjmp/longjmp can cause life info to screw up. */
5267 /* Generate code to load the address of a static var when flag_pic is set. */
5269 arm_pic_static_addr (rtx orig, rtx reg)
5271 rtx l1, labelno, offset_rtx, insn;
5273 gcc_assert (flag_pic);
5275 /* We use an UNSPEC rather than a LABEL_REF because this label
5276 never appears in the code stream. */
5277 labelno = GEN_INT (pic_labelno++);
5278 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5279 l1 = gen_rtx_CONST (VOIDmode, l1);
5281 /* On the ARM the PC register contains 'dot + 8' at the time of the
5282 addition, on the Thumb it is 'dot + 4'. */
5283 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5284 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5285 UNSPEC_SYMBOL_OFFSET);
5286 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5290 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5292 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5294 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5296 else /* TARGET_THUMB1 */
5298 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5299 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5305 /* Return nonzero if X is valid as an ARM state addressing register. */
5307 arm_address_register_rtx_p (rtx x, int strict_p)
5311 if (GET_CODE (x) != REG)
5317 return ARM_REGNO_OK_FOR_BASE_P (regno);
5319 return (regno <= LAST_ARM_REGNUM
5320 || regno >= FIRST_PSEUDO_REGISTER
5321 || regno == FRAME_POINTER_REGNUM
5322 || regno == ARG_POINTER_REGNUM);
5325 /* Return TRUE if this rtx is the difference of a symbol and a label,
5326 and will reduce to a PC-relative relocation in the object file.
5327 Expressions like this can be left alone when generating PIC, rather
5328 than forced through the GOT. */
5330 pcrel_constant_p (rtx x)
5332 if (GET_CODE (x) == MINUS)
5333 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5338 /* Return true if X will surely end up in an index register after next
5341 will_be_in_index_register (const_rtx x)
5343 /* arm.md: calculate_pic_address will split this into a register. */
5344 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5347 /* Return nonzero if X is a valid ARM state address operand. */
5349 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5353 enum rtx_code code = GET_CODE (x);
5355 if (arm_address_register_rtx_p (x, strict_p))
5358 use_ldrd = (TARGET_LDRD
5360 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5362 if (code == POST_INC || code == PRE_DEC
5363 || ((code == PRE_INC || code == POST_DEC)
5364 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5365 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5367 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5368 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5369 && GET_CODE (XEXP (x, 1)) == PLUS
5370 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5372 rtx addend = XEXP (XEXP (x, 1), 1);
5374 /* Don't allow ldrd post increment by register because it's hard
5375 to fixup invalid register choices. */
5377 && GET_CODE (x) == POST_MODIFY
5378 && GET_CODE (addend) == REG)
5381 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5382 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5385 /* After reload constants split into minipools will have addresses
5386 from a LABEL_REF. */
5387 else if (reload_completed
5388 && (code == LABEL_REF
5390 && GET_CODE (XEXP (x, 0)) == PLUS
5391 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5392 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5395 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5398 else if (code == PLUS)
5400 rtx xop0 = XEXP (x, 0);
5401 rtx xop1 = XEXP (x, 1);
5403 return ((arm_address_register_rtx_p (xop0, strict_p)
5404 && ((GET_CODE(xop1) == CONST_INT
5405 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5406 || (!strict_p && will_be_in_index_register (xop1))))
5407 || (arm_address_register_rtx_p (xop1, strict_p)
5408 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5412 /* Reload currently can't handle MINUS, so disable this for now */
5413 else if (GET_CODE (x) == MINUS)
5415 rtx xop0 = XEXP (x, 0);
5416 rtx xop1 = XEXP (x, 1);
5418 return (arm_address_register_rtx_p (xop0, strict_p)
5419 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5423 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5424 && code == SYMBOL_REF
5425 && CONSTANT_POOL_ADDRESS_P (x)
5427 && symbol_mentioned_p (get_pool_constant (x))
5428 && ! pcrel_constant_p (get_pool_constant (x))))
5434 /* Return nonzero if X is a valid Thumb-2 address operand. */
5436 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5439 enum rtx_code code = GET_CODE (x);
5441 if (arm_address_register_rtx_p (x, strict_p))
5444 use_ldrd = (TARGET_LDRD
5446 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5448 if (code == POST_INC || code == PRE_DEC
5449 || ((code == PRE_INC || code == POST_DEC)
5450 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5451 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5453 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5454 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5455 && GET_CODE (XEXP (x, 1)) == PLUS
5456 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5458 /* Thumb-2 only has autoincrement by constant. */
5459 rtx addend = XEXP (XEXP (x, 1), 1);
5460 HOST_WIDE_INT offset;
5462 if (GET_CODE (addend) != CONST_INT)
5465 offset = INTVAL(addend);
5466 if (GET_MODE_SIZE (mode) <= 4)
5467 return (offset > -256 && offset < 256);
5469 return (use_ldrd && offset > -1024 && offset < 1024
5470 && (offset & 3) == 0);
5473 /* After reload constants split into minipools will have addresses
5474 from a LABEL_REF. */
5475 else if (reload_completed
5476 && (code == LABEL_REF
5478 && GET_CODE (XEXP (x, 0)) == PLUS
5479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5483 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5486 else if (code == PLUS)
5488 rtx xop0 = XEXP (x, 0);
5489 rtx xop1 = XEXP (x, 1);
5491 return ((arm_address_register_rtx_p (xop0, strict_p)
5492 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5493 || (!strict_p && will_be_in_index_register (xop1))))
5494 || (arm_address_register_rtx_p (xop1, strict_p)
5495 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5498 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5499 && code == SYMBOL_REF
5500 && CONSTANT_POOL_ADDRESS_P (x)
5502 && symbol_mentioned_p (get_pool_constant (x))
5503 && ! pcrel_constant_p (get_pool_constant (x))))
5509 /* Return nonzero if INDEX is valid for an address index operand in
5512 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5515 HOST_WIDE_INT range;
5516 enum rtx_code code = GET_CODE (index);
5518 /* Standard coprocessor addressing modes. */
5519 if (TARGET_HARD_FLOAT
5520 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5521 && (mode == SFmode || mode == DFmode
5522 || (TARGET_MAVERICK && mode == DImode)))
5523 return (code == CONST_INT && INTVAL (index) < 1024
5524 && INTVAL (index) > -1024
5525 && (INTVAL (index) & 3) == 0);
5527 /* For quad modes, we restrict the constant offset to be slightly less
5528 than what the instruction format permits. We do this because for
5529 quad mode moves, we will actually decompose them into two separate
5530 double-mode reads or writes. INDEX must therefore be a valid
5531 (double-mode) offset and so should INDEX+8. */
5532 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5533 return (code == CONST_INT
5534 && INTVAL (index) < 1016
5535 && INTVAL (index) > -1024
5536 && (INTVAL (index) & 3) == 0);
5538 /* We have no such constraint on double mode offsets, so we permit the
5539 full range of the instruction format. */
5540 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5541 return (code == CONST_INT
5542 && INTVAL (index) < 1024
5543 && INTVAL (index) > -1024
5544 && (INTVAL (index) & 3) == 0);
5546 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5547 return (code == CONST_INT
5548 && INTVAL (index) < 1024
5549 && INTVAL (index) > -1024
5550 && (INTVAL (index) & 3) == 0);
5552 if (arm_address_register_rtx_p (index, strict_p)
5553 && (GET_MODE_SIZE (mode) <= 4))
5556 if (mode == DImode || mode == DFmode)
5558 if (code == CONST_INT)
5560 HOST_WIDE_INT val = INTVAL (index);
5563 return val > -256 && val < 256;
5565 return val > -4096 && val < 4092;
5568 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5571 if (GET_MODE_SIZE (mode) <= 4
5575 || (mode == QImode && outer == SIGN_EXTEND))))
5579 rtx xiop0 = XEXP (index, 0);
5580 rtx xiop1 = XEXP (index, 1);
5582 return ((arm_address_register_rtx_p (xiop0, strict_p)
5583 && power_of_two_operand (xiop1, SImode))
5584 || (arm_address_register_rtx_p (xiop1, strict_p)
5585 && power_of_two_operand (xiop0, SImode)));
5587 else if (code == LSHIFTRT || code == ASHIFTRT
5588 || code == ASHIFT || code == ROTATERT)
5590 rtx op = XEXP (index, 1);
5592 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5593 && GET_CODE (op) == CONST_INT
5595 && INTVAL (op) <= 31);
5599 /* For ARM v4 we may be doing a sign-extend operation during the
5605 || (outer == SIGN_EXTEND && mode == QImode))
5611 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5613 return (code == CONST_INT
5614 && INTVAL (index) < range
5615 && INTVAL (index) > -range);
5618 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5619 index operand. i.e. 1, 2, 4 or 8. */
5621 thumb2_index_mul_operand (rtx op)
5625 if (GET_CODE(op) != CONST_INT)
5629 return (val == 1 || val == 2 || val == 4 || val == 8);
5632 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5634 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5636 enum rtx_code code = GET_CODE (index);
5638 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5639 /* Standard coprocessor addressing modes. */
5640 if (TARGET_HARD_FLOAT
5641 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5642 && (mode == SFmode || mode == DFmode
5643 || (TARGET_MAVERICK && mode == DImode)))
5644 return (code == CONST_INT && INTVAL (index) < 1024
5645 /* Thumb-2 allows only > -256 index range for it's core register
5646 load/stores. Since we allow SF/DF in core registers, we have
5647 to use the intersection between -256~4096 (core) and -1024~1024
5649 && INTVAL (index) > -256
5650 && (INTVAL (index) & 3) == 0);
5652 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5654 /* For DImode assume values will usually live in core regs
5655 and only allow LDRD addressing modes. */
5656 if (!TARGET_LDRD || mode != DImode)
5657 return (code == CONST_INT
5658 && INTVAL (index) < 1024
5659 && INTVAL (index) > -1024
5660 && (INTVAL (index) & 3) == 0);
5663 /* For quad modes, we restrict the constant offset to be slightly less
5664 than what the instruction format permits. We do this because for
5665 quad mode moves, we will actually decompose them into two separate
5666 double-mode reads or writes. INDEX must therefore be a valid
5667 (double-mode) offset and so should INDEX+8. */
5668 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5669 return (code == CONST_INT
5670 && INTVAL (index) < 1016
5671 && INTVAL (index) > -1024
5672 && (INTVAL (index) & 3) == 0);
5674 /* We have no such constraint on double mode offsets, so we permit the
5675 full range of the instruction format. */
5676 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1024
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 if (arm_address_register_rtx_p (index, strict_p)
5683 && (GET_MODE_SIZE (mode) <= 4))
5686 if (mode == DImode || mode == DFmode)
5688 if (code == CONST_INT)
5690 HOST_WIDE_INT val = INTVAL (index);
5691 /* ??? Can we assume ldrd for thumb2? */
5692 /* Thumb-2 ldrd only has reg+const addressing modes. */
5693 /* ldrd supports offsets of +-1020.
5694 However the ldr fallback does not. */
5695 return val > -256 && val < 256 && (val & 3) == 0;
5703 rtx xiop0 = XEXP (index, 0);
5704 rtx xiop1 = XEXP (index, 1);
5706 return ((arm_address_register_rtx_p (xiop0, strict_p)
5707 && thumb2_index_mul_operand (xiop1))
5708 || (arm_address_register_rtx_p (xiop1, strict_p)
5709 && thumb2_index_mul_operand (xiop0)));
5711 else if (code == ASHIFT)
5713 rtx op = XEXP (index, 1);
5715 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5716 && GET_CODE (op) == CONST_INT
5718 && INTVAL (op) <= 3);
5721 return (code == CONST_INT
5722 && INTVAL (index) < 4096
5723 && INTVAL (index) > -256);
5726 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5728 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5732 if (GET_CODE (x) != REG)
5738 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5740 return (regno <= LAST_LO_REGNUM
5741 || regno > LAST_VIRTUAL_REGISTER
5742 || regno == FRAME_POINTER_REGNUM
5743 || (GET_MODE_SIZE (mode) >= 4
5744 && (regno == STACK_POINTER_REGNUM
5745 || regno >= FIRST_PSEUDO_REGISTER
5746 || x == hard_frame_pointer_rtx
5747 || x == arg_pointer_rtx)));
5750 /* Return nonzero if x is a legitimate index register. This is the case
5751 for any base register that can access a QImode object. */
5753 thumb1_index_register_rtx_p (rtx x, int strict_p)
5755 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5758 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5760 The AP may be eliminated to either the SP or the FP, so we use the
5761 least common denominator, e.g. SImode, and offsets from 0 to 64.
5763 ??? Verify whether the above is the right approach.
5765 ??? Also, the FP may be eliminated to the SP, so perhaps that
5766 needs special handling also.
5768 ??? Look at how the mips16 port solves this problem. It probably uses
5769 better ways to solve some of these problems.
5771 Although it is not incorrect, we don't accept QImode and HImode
5772 addresses based on the frame pointer or arg pointer until the
5773 reload pass starts. This is so that eliminating such addresses
5774 into stack based ones won't produce impossible code. */
5776 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 /* ??? Not clear if this is right. Experiment. */
5779 if (GET_MODE_SIZE (mode) < 4
5780 && !(reload_in_progress || reload_completed)
5781 && (reg_mentioned_p (frame_pointer_rtx, x)
5782 || reg_mentioned_p (arg_pointer_rtx, x)
5783 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5784 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5785 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5786 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5789 /* Accept any base register. SP only in SImode or larger. */
5790 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5793 /* This is PC relative data before arm_reorg runs. */
5794 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5795 && GET_CODE (x) == SYMBOL_REF
5796 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5799 /* This is PC relative data after arm_reorg runs. */
5800 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5802 && (GET_CODE (x) == LABEL_REF
5803 || (GET_CODE (x) == CONST
5804 && GET_CODE (XEXP (x, 0)) == PLUS
5805 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5809 /* Post-inc indexing only supported for SImode and larger. */
5810 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5811 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5814 else if (GET_CODE (x) == PLUS)
5816 /* REG+REG address can be any two index registers. */
5817 /* We disallow FRAME+REG addressing since we know that FRAME
5818 will be replaced with STACK, and SP relative addressing only
5819 permits SP+OFFSET. */
5820 if (GET_MODE_SIZE (mode) <= 4
5821 && XEXP (x, 0) != frame_pointer_rtx
5822 && XEXP (x, 1) != frame_pointer_rtx
5823 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5824 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5825 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5828 /* REG+const has 5-7 bit offset for non-SP registers. */
5829 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5830 || XEXP (x, 0) == arg_pointer_rtx)
5831 && GET_CODE (XEXP (x, 1)) == CONST_INT
5832 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5835 /* REG+const has 10-bit offset for SP, but only SImode and
5836 larger is supported. */
5837 /* ??? Should probably check for DI/DFmode overflow here
5838 just like GO_IF_LEGITIMATE_OFFSET does. */
5839 else if (GET_CODE (XEXP (x, 0)) == REG
5840 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5841 && GET_MODE_SIZE (mode) >= 4
5842 && GET_CODE (XEXP (x, 1)) == CONST_INT
5843 && INTVAL (XEXP (x, 1)) >= 0
5844 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5845 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5848 else if (GET_CODE (XEXP (x, 0)) == REG
5849 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5850 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5851 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5852 && REGNO (XEXP (x, 0))
5853 <= LAST_VIRTUAL_POINTER_REGISTER))
5854 && GET_MODE_SIZE (mode) >= 4
5855 && GET_CODE (XEXP (x, 1)) == CONST_INT
5856 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5860 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5861 && GET_MODE_SIZE (mode) == 4
5862 && GET_CODE (x) == SYMBOL_REF
5863 && CONSTANT_POOL_ADDRESS_P (x)
5865 && symbol_mentioned_p (get_pool_constant (x))
5866 && ! pcrel_constant_p (get_pool_constant (x))))
5872 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5873 instruction of mode MODE. */
5875 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5877 switch (GET_MODE_SIZE (mode))
5880 return val >= 0 && val < 32;
5883 return val >= 0 && val < 64 && (val & 1) == 0;
5887 && (val + GET_MODE_SIZE (mode)) <= 128
5893 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5896 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5897 else if (TARGET_THUMB2)
5898 return thumb2_legitimate_address_p (mode, x, strict_p);
5899 else /* if (TARGET_THUMB1) */
5900 return thumb1_legitimate_address_p (mode, x, strict_p);
5903 /* Build the SYMBOL_REF for __tls_get_addr. */
5905 static GTY(()) rtx tls_get_addr_libfunc;
5908 get_tls_get_addr (void)
5910 if (!tls_get_addr_libfunc)
5911 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5912 return tls_get_addr_libfunc;
5916 arm_load_tp (rtx target)
5919 target = gen_reg_rtx (SImode);
5923 /* Can return in any reg. */
5924 emit_insn (gen_load_tp_hard (target));
5928 /* Always returned in r0. Immediately copy the result into a pseudo,
5929 otherwise other uses of r0 (e.g. setting up function arguments) may
5930 clobber the value. */
5934 emit_insn (gen_load_tp_soft ());
5936 tmp = gen_rtx_REG (SImode, 0);
5937 emit_move_insn (target, tmp);
5943 load_tls_operand (rtx x, rtx reg)
5947 if (reg == NULL_RTX)
5948 reg = gen_reg_rtx (SImode);
5950 tmp = gen_rtx_CONST (SImode, x);
5952 emit_move_insn (reg, tmp);
5958 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5960 rtx insns, label, labelno, sum;
5964 labelno = GEN_INT (pic_labelno++);
5965 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5966 label = gen_rtx_CONST (VOIDmode, label);
5968 sum = gen_rtx_UNSPEC (Pmode,
5969 gen_rtvec (4, x, GEN_INT (reloc), label,
5970 GEN_INT (TARGET_ARM ? 8 : 4)),
5972 reg = load_tls_operand (sum, reg);
5975 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5976 else if (TARGET_THUMB2)
5977 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5978 else /* TARGET_THUMB1 */
5979 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5981 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5982 Pmode, 1, reg, Pmode);
5984 insns = get_insns ();
5991 legitimize_tls_address (rtx x, rtx reg)
5993 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5994 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5998 case TLS_MODEL_GLOBAL_DYNAMIC:
5999 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6000 dest = gen_reg_rtx (Pmode);
6001 emit_libcall_block (insns, dest, ret, x);
6004 case TLS_MODEL_LOCAL_DYNAMIC:
6005 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6007 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6008 share the LDM result with other LD model accesses. */
6009 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6011 dest = gen_reg_rtx (Pmode);
6012 emit_libcall_block (insns, dest, ret, eqv);
6014 /* Load the addend. */
6015 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6017 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6018 return gen_rtx_PLUS (Pmode, dest, addend);
6020 case TLS_MODEL_INITIAL_EXEC:
6021 labelno = GEN_INT (pic_labelno++);
6022 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6023 label = gen_rtx_CONST (VOIDmode, label);
6024 sum = gen_rtx_UNSPEC (Pmode,
6025 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6026 GEN_INT (TARGET_ARM ? 8 : 4)),
6028 reg = load_tls_operand (sum, reg);
6031 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6032 else if (TARGET_THUMB2)
6033 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6036 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6037 emit_move_insn (reg, gen_const_mem (SImode, reg));
6040 tp = arm_load_tp (NULL_RTX);
6042 return gen_rtx_PLUS (Pmode, tp, reg);
6044 case TLS_MODEL_LOCAL_EXEC:
6045 tp = arm_load_tp (NULL_RTX);
6047 reg = gen_rtx_UNSPEC (Pmode,
6048 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6050 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6052 return gen_rtx_PLUS (Pmode, tp, reg);
6059 /* Try machine-dependent ways of modifying an illegitimate address
6060 to be legitimate. If we find one, return the new, valid address. */
6062 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6066 /* TODO: legitimize_address for Thumb2. */
6069 return thumb_legitimize_address (x, orig_x, mode);
6072 if (arm_tls_symbol_p (x))
6073 return legitimize_tls_address (x, NULL_RTX);
6075 if (GET_CODE (x) == PLUS)
6077 rtx xop0 = XEXP (x, 0);
6078 rtx xop1 = XEXP (x, 1);
6080 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6081 xop0 = force_reg (SImode, xop0);
6083 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6084 xop1 = force_reg (SImode, xop1);
6086 if (ARM_BASE_REGISTER_RTX_P (xop0)
6087 && GET_CODE (xop1) == CONST_INT)
6089 HOST_WIDE_INT n, low_n;
6093 /* VFP addressing modes actually allow greater offsets, but for
6094 now we just stick with the lowest common denominator. */
6096 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6108 low_n = ((mode) == TImode ? 0
6109 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6113 base_reg = gen_reg_rtx (SImode);
6114 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6115 emit_move_insn (base_reg, val);
6116 x = plus_constant (base_reg, low_n);
6118 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6119 x = gen_rtx_PLUS (SImode, xop0, xop1);
6122 /* XXX We don't allow MINUS any more -- see comment in
6123 arm_legitimate_address_outer_p (). */
6124 else if (GET_CODE (x) == MINUS)
6126 rtx xop0 = XEXP (x, 0);
6127 rtx xop1 = XEXP (x, 1);
6129 if (CONSTANT_P (xop0))
6130 xop0 = force_reg (SImode, xop0);
6132 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6133 xop1 = force_reg (SImode, xop1);
6135 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6136 x = gen_rtx_MINUS (SImode, xop0, xop1);
6139 /* Make sure to take full advantage of the pre-indexed addressing mode
6140 with absolute addresses which often allows for the base register to
6141 be factorized for multiple adjacent memory references, and it might
6142 even allows for the mini pool to be avoided entirely. */
6143 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6146 HOST_WIDE_INT mask, base, index;
6149 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6150 use a 8-bit index. So let's use a 12-bit index for SImode only and
6151 hope that arm_gen_constant will enable ldrb to use more bits. */
6152 bits = (mode == SImode) ? 12 : 8;
6153 mask = (1 << bits) - 1;
6154 base = INTVAL (x) & ~mask;
6155 index = INTVAL (x) & mask;
6156 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6158 /* It'll most probably be more efficient to generate the base
6159 with more bits set and use a negative index instead. */
6163 base_reg = force_reg (SImode, GEN_INT (base));
6164 x = plus_constant (base_reg, index);
6169 /* We need to find and carefully transform any SYMBOL and LABEL
6170 references; so go back to the original address expression. */
6171 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6173 if (new_x != orig_x)
6181 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6182 to be legitimate. If we find one, return the new, valid address. */
6184 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6186 if (arm_tls_symbol_p (x))
6187 return legitimize_tls_address (x, NULL_RTX);
6189 if (GET_CODE (x) == PLUS
6190 && GET_CODE (XEXP (x, 1)) == CONST_INT
6191 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6192 || INTVAL (XEXP (x, 1)) < 0))
6194 rtx xop0 = XEXP (x, 0);
6195 rtx xop1 = XEXP (x, 1);
6196 HOST_WIDE_INT offset = INTVAL (xop1);
6198 /* Try and fold the offset into a biasing of the base register and
6199 then offsetting that. Don't do this when optimizing for space
6200 since it can cause too many CSEs. */
6201 if (optimize_size && offset >= 0
6202 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6204 HOST_WIDE_INT delta;
6207 delta = offset - (256 - GET_MODE_SIZE (mode));
6208 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6209 delta = 31 * GET_MODE_SIZE (mode);
6211 delta = offset & (~31 * GET_MODE_SIZE (mode));
6213 xop0 = force_operand (plus_constant (xop0, offset - delta),
6215 x = plus_constant (xop0, delta);
6217 else if (offset < 0 && offset > -256)
6218 /* Small negative offsets are best done with a subtract before the
6219 dereference, forcing these into a register normally takes two
6221 x = force_operand (x, NULL_RTX);
6224 /* For the remaining cases, force the constant into a register. */
6225 xop1 = force_reg (SImode, xop1);
6226 x = gen_rtx_PLUS (SImode, xop0, xop1);
6229 else if (GET_CODE (x) == PLUS
6230 && s_register_operand (XEXP (x, 1), SImode)
6231 && !s_register_operand (XEXP (x, 0), SImode))
6233 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6235 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6240 /* We need to find and carefully transform any SYMBOL and LABEL
6241 references; so go back to the original address expression. */
6242 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6244 if (new_x != orig_x)
6252 arm_legitimize_reload_address (rtx *p,
6253 enum machine_mode mode,
6254 int opnum, int type,
6255 int ind_levels ATTRIBUTE_UNUSED)
6257 if (GET_CODE (*p) == PLUS
6258 && GET_CODE (XEXP (*p, 0)) == REG
6259 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6260 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6262 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6263 HOST_WIDE_INT low, high;
6265 /* Detect coprocessor load/stores. */
6266 bool coproc_p = ((TARGET_HARD_FLOAT
6267 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6268 && (mode == SFmode || mode == DFmode
6269 || (mode == DImode && TARGET_MAVERICK)))
6270 || (TARGET_REALLY_IWMMXT
6271 && VALID_IWMMXT_REG_MODE (mode))
6273 && (VALID_NEON_DREG_MODE (mode)
6274 || VALID_NEON_QREG_MODE (mode))));
6276 /* For some conditions, bail out when lower two bits are unaligned. */
6277 if ((val & 0x3) != 0
6278 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6280 /* For DI, and DF under soft-float: */
6281 || ((mode == DImode || mode == DFmode)
6282 /* Without ldrd, we use stm/ldm, which does not
6283 fair well with unaligned bits. */
6285 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6286 || TARGET_THUMB2))))
6289 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6290 of which the (reg+high) gets turned into a reload add insn,
6291 we try to decompose the index into high/low values that can often
6292 also lead to better reload CSE.
6294 ldr r0, [r2, #4100] // Offset too large
6295 ldr r1, [r2, #4104] // Offset too large
6297 is best reloaded as:
6303 which post-reload CSE can simplify in most cases to eliminate the
6304 second add instruction:
6309 The idea here is that we want to split out the bits of the constant
6310 as a mask, rather than as subtracting the maximum offset that the
6311 respective type of load/store used can handle.
6313 When encountering negative offsets, we can still utilize it even if
6314 the overall offset is positive; sometimes this may lead to an immediate
6315 that can be constructed with fewer instructions.
6317 ldr r0, [r2, #0x3FFFFC]
6319 This is best reloaded as:
6320 add t1, r2, #0x400000
6323 The trick for spotting this for a load insn with N bits of offset
6324 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6325 negative offset that is going to make bit N and all the bits below
6326 it become zero in the remainder part.
6328 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6329 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6330 used in most cases of ARM load/store instructions. */
6332 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6333 (((VAL) & ((1 << (N)) - 1)) \
6334 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6339 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6341 /* NEON quad-word load/stores are made of two double-word accesses,
6342 so the valid index range is reduced by 8. Treat as 9-bit range if
6344 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6345 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6347 else if (GET_MODE_SIZE (mode) == 8)
6350 low = (TARGET_THUMB2
6351 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6352 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6354 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6355 to access doublewords. The supported load/store offsets are
6356 -8, -4, and 4, which we try to produce here. */
6357 low = ((val & 0xf) ^ 0x8) - 0x8;
6359 else if (GET_MODE_SIZE (mode) < 8)
6361 /* NEON element load/stores do not have an offset. */
6362 if (TARGET_NEON_FP16 && mode == HFmode)
6367 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6368 Try the wider 12-bit range first, and re-try if the result
6370 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6372 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6376 if (mode == HImode || mode == HFmode)
6379 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6382 /* The storehi/movhi_bytes fallbacks can use only
6383 [-4094,+4094] of the full ldrb/strb index range. */
6384 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6385 if (low == 4095 || low == -4095)
6390 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6396 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6397 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6398 - (unsigned HOST_WIDE_INT) 0x80000000);
6399 /* Check for overflow or zero */
6400 if (low == 0 || high == 0 || (high + low != val))
6403 /* Reload the high part into a base reg; leave the low part
6405 *p = gen_rtx_PLUS (GET_MODE (*p),
6406 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6409 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6410 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6411 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6419 thumb_legitimize_reload_address (rtx *x_p,
6420 enum machine_mode mode,
6421 int opnum, int type,
6422 int ind_levels ATTRIBUTE_UNUSED)
6426 if (GET_CODE (x) == PLUS
6427 && GET_MODE_SIZE (mode) < 4
6428 && REG_P (XEXP (x, 0))
6429 && XEXP (x, 0) == stack_pointer_rtx
6430 && GET_CODE (XEXP (x, 1)) == CONST_INT
6431 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6436 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6437 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6441 /* If both registers are hi-regs, then it's better to reload the
6442 entire expression rather than each register individually. That
6443 only requires one reload register rather than two. */
6444 if (GET_CODE (x) == PLUS
6445 && REG_P (XEXP (x, 0))
6446 && REG_P (XEXP (x, 1))
6447 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6448 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6453 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6454 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6461 /* Test for various thread-local symbols. */
6463 /* Return TRUE if X is a thread-local symbol. */
6466 arm_tls_symbol_p (rtx x)
6468 if (! TARGET_HAVE_TLS)
6471 if (GET_CODE (x) != SYMBOL_REF)
6474 return SYMBOL_REF_TLS_MODEL (x) != 0;
6477 /* Helper for arm_tls_referenced_p. */
6480 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6482 if (GET_CODE (*x) == SYMBOL_REF)
6483 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6485 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6486 TLS offsets, not real symbol references. */
6487 if (GET_CODE (*x) == UNSPEC
6488 && XINT (*x, 1) == UNSPEC_TLS)
6494 /* Return TRUE if X contains any TLS symbol references. */
6497 arm_tls_referenced_p (rtx x)
6499 if (! TARGET_HAVE_TLS)
6502 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6505 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6507 On the ARM, allow any integer (invalid ones are removed later by insn
6508 patterns), nice doubles and symbol_refs which refer to the function's
6511 When generating pic allow anything. */
6514 arm_legitimate_constant_p_1 (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6516 return flag_pic || !label_mentioned_p (x);
6520 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6522 return (GET_CODE (x) == CONST_INT
6523 || GET_CODE (x) == CONST_DOUBLE
6524 || CONSTANT_ADDRESS_P (x)
6529 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6531 return (!arm_cannot_force_const_mem (mode, x)
6533 ? arm_legitimate_constant_p_1 (mode, x)
6534 : thumb_legitimate_constant_p (mode, x)));
6537 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6540 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6544 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6546 split_const (x, &base, &offset);
6547 if (GET_CODE (base) == SYMBOL_REF
6548 && !offset_within_block_p (base, INTVAL (offset)))
6551 return arm_tls_referenced_p (x);
6554 #define REG_OR_SUBREG_REG(X) \
6555 (GET_CODE (X) == REG \
6556 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6558 #define REG_OR_SUBREG_RTX(X) \
6559 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6562 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6564 enum machine_mode mode = GET_MODE (x);
6578 return COSTS_N_INSNS (1);
6581 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6584 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6591 return COSTS_N_INSNS (2) + cycles;
6593 return COSTS_N_INSNS (1) + 16;
6596 return (COSTS_N_INSNS (1)
6597 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6598 + GET_CODE (SET_DEST (x)) == MEM));
6603 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6605 if (thumb_shiftable_const (INTVAL (x)))
6606 return COSTS_N_INSNS (2);
6607 return COSTS_N_INSNS (3);
6609 else if ((outer == PLUS || outer == COMPARE)
6610 && INTVAL (x) < 256 && INTVAL (x) > -256)
6612 else if ((outer == IOR || outer == XOR || outer == AND)
6613 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6614 return COSTS_N_INSNS (1);
6615 else if (outer == AND)
6618 /* This duplicates the tests in the andsi3 expander. */
6619 for (i = 9; i <= 31; i++)
6620 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6621 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6622 return COSTS_N_INSNS (2);
6624 else if (outer == ASHIFT || outer == ASHIFTRT
6625 || outer == LSHIFTRT)
6627 return COSTS_N_INSNS (2);
6633 return COSTS_N_INSNS (3);
6651 /* XXX another guess. */
6652 /* Memory costs quite a lot for the first word, but subsequent words
6653 load at the equivalent of a single insn each. */
6654 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6655 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6660 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6666 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6667 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6673 return total + COSTS_N_INSNS (1);
6675 /* Assume a two-shift sequence. Increase the cost slightly so
6676 we prefer actual shifts over an extend operation. */
6677 return total + 1 + COSTS_N_INSNS (2);
6685 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6687 enum machine_mode mode = GET_MODE (x);
6688 enum rtx_code subcode;
6690 enum rtx_code code = GET_CODE (x);
6696 /* Memory costs quite a lot for the first word, but subsequent words
6697 load at the equivalent of a single insn each. */
6698 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6705 if (TARGET_HARD_FLOAT && mode == SFmode)
6706 *total = COSTS_N_INSNS (2);
6707 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6708 *total = COSTS_N_INSNS (4);
6710 *total = COSTS_N_INSNS (20);
6714 if (GET_CODE (XEXP (x, 1)) == REG)
6715 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6716 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6717 *total = rtx_cost (XEXP (x, 1), code, speed);
6723 *total += COSTS_N_INSNS (4);
6728 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6729 *total += rtx_cost (XEXP (x, 0), code, speed);
6732 *total += COSTS_N_INSNS (3);
6736 *total += COSTS_N_INSNS (1);
6737 /* Increase the cost of complex shifts because they aren't any faster,
6738 and reduce dual issue opportunities. */
6739 if (arm_tune_cortex_a9
6740 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6748 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6749 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6750 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6752 *total += rtx_cost (XEXP (x, 1), code, speed);
6756 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6757 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6759 *total += rtx_cost (XEXP (x, 0), code, speed);
6766 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6768 if (TARGET_HARD_FLOAT
6770 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6772 *total = COSTS_N_INSNS (1);
6773 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6774 && arm_const_double_rtx (XEXP (x, 0)))
6776 *total += rtx_cost (XEXP (x, 1), code, speed);
6780 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6781 && arm_const_double_rtx (XEXP (x, 1)))
6783 *total += rtx_cost (XEXP (x, 0), code, speed);
6789 *total = COSTS_N_INSNS (20);
6793 *total = COSTS_N_INSNS (1);
6794 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6795 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6797 *total += rtx_cost (XEXP (x, 1), code, speed);
6801 subcode = GET_CODE (XEXP (x, 1));
6802 if (subcode == ASHIFT || subcode == ASHIFTRT
6803 || subcode == LSHIFTRT
6804 || subcode == ROTATE || subcode == ROTATERT)
6806 *total += rtx_cost (XEXP (x, 0), code, speed);
6807 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6811 /* A shift as a part of RSB costs no more than RSB itself. */
6812 if (GET_CODE (XEXP (x, 0)) == MULT
6813 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6815 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6816 *total += rtx_cost (XEXP (x, 1), code, speed);
6821 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6823 *total += rtx_cost (XEXP (x, 0), code, speed);
6824 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6828 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6829 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6831 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6832 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6833 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6834 *total += COSTS_N_INSNS (1);
6842 if (code == PLUS && arm_arch6 && mode == SImode
6843 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6844 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6846 *total = COSTS_N_INSNS (1);
6847 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6849 *total += rtx_cost (XEXP (x, 1), code, speed);
6853 /* MLA: All arguments must be registers. We filter out
6854 multiplication by a power of two, so that we fall down into
6856 if (GET_CODE (XEXP (x, 0)) == MULT
6857 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6859 /* The cost comes from the cost of the multiply. */
6863 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6865 if (TARGET_HARD_FLOAT
6867 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6869 *total = COSTS_N_INSNS (1);
6870 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6871 && arm_const_double_rtx (XEXP (x, 1)))
6873 *total += rtx_cost (XEXP (x, 0), code, speed);
6880 *total = COSTS_N_INSNS (20);
6884 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6885 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6887 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6888 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6889 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6890 *total += COSTS_N_INSNS (1);
6896 case AND: case XOR: case IOR:
6898 /* Normally the frame registers will be spilt into reg+const during
6899 reload, so it is a bad idea to combine them with other instructions,
6900 since then they might not be moved outside of loops. As a compromise
6901 we allow integration with ops that have a constant as their second
6903 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6904 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6905 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6906 *total = COSTS_N_INSNS (1);
6910 *total += COSTS_N_INSNS (2);
6911 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6912 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6914 *total += rtx_cost (XEXP (x, 0), code, speed);
6921 *total += COSTS_N_INSNS (1);
6922 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6923 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6925 *total += rtx_cost (XEXP (x, 0), code, speed);
6928 subcode = GET_CODE (XEXP (x, 0));
6929 if (subcode == ASHIFT || subcode == ASHIFTRT
6930 || subcode == LSHIFTRT
6931 || subcode == ROTATE || subcode == ROTATERT)
6933 *total += rtx_cost (XEXP (x, 1), code, speed);
6934 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6939 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6941 *total += rtx_cost (XEXP (x, 1), code, speed);
6942 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6946 if (subcode == UMIN || subcode == UMAX
6947 || subcode == SMIN || subcode == SMAX)
6949 *total = COSTS_N_INSNS (3);
6956 /* This should have been handled by the CPU specific routines. */
6960 if (arm_arch3m && mode == SImode
6961 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6963 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6964 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6965 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6966 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6968 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6971 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6975 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6977 if (TARGET_HARD_FLOAT
6979 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6981 *total = COSTS_N_INSNS (1);
6984 *total = COSTS_N_INSNS (2);
6990 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6991 if (mode == SImode && code == NOT)
6993 subcode = GET_CODE (XEXP (x, 0));
6994 if (subcode == ASHIFT || subcode == ASHIFTRT
6995 || subcode == LSHIFTRT
6996 || subcode == ROTATE || subcode == ROTATERT
6998 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7000 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7001 /* Register shifts cost an extra cycle. */
7002 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7003 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7012 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7014 *total = COSTS_N_INSNS (4);
7018 operand = XEXP (x, 0);
7020 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7021 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7022 && GET_CODE (XEXP (operand, 0)) == REG
7023 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7024 *total += COSTS_N_INSNS (1);
7025 *total += (rtx_cost (XEXP (x, 1), code, speed)
7026 + rtx_cost (XEXP (x, 2), code, speed));
7030 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7032 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7038 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7039 && mode == SImode && XEXP (x, 1) == const0_rtx)
7041 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7047 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7048 && mode == SImode && XEXP (x, 1) == const0_rtx)
7050 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7070 /* SCC insns. In the case where the comparison has already been
7071 performed, then they cost 2 instructions. Otherwise they need
7072 an additional comparison before them. */
7073 *total = COSTS_N_INSNS (2);
7074 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7081 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7087 *total += COSTS_N_INSNS (1);
7088 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7089 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7091 *total += rtx_cost (XEXP (x, 0), code, speed);
7095 subcode = GET_CODE (XEXP (x, 0));
7096 if (subcode == ASHIFT || subcode == ASHIFTRT
7097 || subcode == LSHIFTRT
7098 || subcode == ROTATE || subcode == ROTATERT)
7100 *total += rtx_cost (XEXP (x, 1), code, speed);
7101 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7106 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7108 *total += rtx_cost (XEXP (x, 1), code, speed);
7109 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7119 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7120 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7121 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7122 *total += rtx_cost (XEXP (x, 1), code, speed);
7126 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7128 if (TARGET_HARD_FLOAT
7130 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7132 *total = COSTS_N_INSNS (1);
7135 *total = COSTS_N_INSNS (20);
7138 *total = COSTS_N_INSNS (1);
7140 *total += COSTS_N_INSNS (3);
7146 if (GET_MODE_CLASS (mode) == MODE_INT)
7148 rtx op = XEXP (x, 0);
7149 enum machine_mode opmode = GET_MODE (op);
7152 *total += COSTS_N_INSNS (1);
7154 if (opmode != SImode)
7158 /* If !arm_arch4, we use one of the extendhisi2_mem
7159 or movhi_bytes patterns for HImode. For a QImode
7160 sign extension, we first zero-extend from memory
7161 and then perform a shift sequence. */
7162 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7163 *total += COSTS_N_INSNS (2);
7166 *total += COSTS_N_INSNS (1);
7168 /* We don't have the necessary insn, so we need to perform some
7170 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7171 /* An and with constant 255. */
7172 *total += COSTS_N_INSNS (1);
7174 /* A shift sequence. Increase costs slightly to avoid
7175 combining two shifts into an extend operation. */
7176 *total += COSTS_N_INSNS (2) + 1;
7182 switch (GET_MODE (XEXP (x, 0)))
7189 *total = COSTS_N_INSNS (1);
7199 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7203 if (const_ok_for_arm (INTVAL (x))
7204 || const_ok_for_arm (~INTVAL (x)))
7205 *total = COSTS_N_INSNS (1);
7207 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7208 INTVAL (x), NULL_RTX,
7215 *total = COSTS_N_INSNS (3);
7219 *total = COSTS_N_INSNS (1);
7223 *total = COSTS_N_INSNS (1);
7224 *total += rtx_cost (XEXP (x, 0), code, speed);
7228 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7229 && (mode == SFmode || !TARGET_VFP_SINGLE))
7230 *total = COSTS_N_INSNS (1);
7232 *total = COSTS_N_INSNS (4);
7236 *total = COSTS_N_INSNS (4);
7241 /* Estimates the size cost of thumb1 instructions.
7242 For now most of the code is copied from thumb1_rtx_costs. We need more
7243 fine grain tuning when we have more related test cases. */
7245 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7247 enum machine_mode mode = GET_MODE (x);
7260 return COSTS_N_INSNS (1);
7263 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7265 /* Thumb1 mul instruction can't operate on const. We must Load it
7266 into a register first. */
7267 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7268 return COSTS_N_INSNS (1) + const_size;
7270 return COSTS_N_INSNS (1);
7273 return (COSTS_N_INSNS (1)
7274 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7275 + GET_CODE (SET_DEST (x)) == MEM));
7280 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7281 return COSTS_N_INSNS (1);
7282 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7283 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7284 return COSTS_N_INSNS (2);
7285 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7286 if (thumb_shiftable_const (INTVAL (x)))
7287 return COSTS_N_INSNS (2);
7288 return COSTS_N_INSNS (3);
7290 else if ((outer == PLUS || outer == COMPARE)
7291 && INTVAL (x) < 256 && INTVAL (x) > -256)
7293 else if ((outer == IOR || outer == XOR || outer == AND)
7294 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7295 return COSTS_N_INSNS (1);
7296 else if (outer == AND)
7299 /* This duplicates the tests in the andsi3 expander. */
7300 for (i = 9; i <= 31; i++)
7301 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7302 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7303 return COSTS_N_INSNS (2);
7305 else if (outer == ASHIFT || outer == ASHIFTRT
7306 || outer == LSHIFTRT)
7308 return COSTS_N_INSNS (2);
7314 return COSTS_N_INSNS (3);
7332 /* XXX another guess. */
7333 /* Memory costs quite a lot for the first word, but subsequent words
7334 load at the equivalent of a single insn each. */
7335 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7336 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7341 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7346 /* XXX still guessing. */
7347 switch (GET_MODE (XEXP (x, 0)))
7350 return (1 + (mode == DImode ? 4 : 0)
7351 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7354 return (4 + (mode == DImode ? 4 : 0)
7355 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7358 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7369 /* RTX costs when optimizing for size. */
7371 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7374 enum machine_mode mode = GET_MODE (x);
7377 *total = thumb1_size_rtx_costs (x, code, outer_code);
7381 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7385 /* A memory access costs 1 insn if the mode is small, or the address is
7386 a single register, otherwise it costs one insn per word. */
7387 if (REG_P (XEXP (x, 0)))
7388 *total = COSTS_N_INSNS (1);
7390 && GET_CODE (XEXP (x, 0)) == PLUS
7391 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7392 /* This will be split into two instructions.
7393 See arm.md:calculate_pic_address. */
7394 *total = COSTS_N_INSNS (2);
7396 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7403 /* Needs a libcall, so it costs about this. */
7404 *total = COSTS_N_INSNS (2);
7408 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7410 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7418 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7420 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7423 else if (mode == SImode)
7425 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7426 /* Slightly disparage register shifts, but not by much. */
7427 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7428 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7432 /* Needs a libcall. */
7433 *total = COSTS_N_INSNS (2);
7437 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7438 && (mode == SFmode || !TARGET_VFP_SINGLE))
7440 *total = COSTS_N_INSNS (1);
7446 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7447 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7449 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7450 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7451 || subcode1 == ROTATE || subcode1 == ROTATERT
7452 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7453 || subcode1 == ASHIFTRT)
7455 /* It's just the cost of the two operands. */
7460 *total = COSTS_N_INSNS (1);
7464 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7468 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7469 && (mode == SFmode || !TARGET_VFP_SINGLE))
7471 *total = COSTS_N_INSNS (1);
7475 /* A shift as a part of ADD costs nothing. */
7476 if (GET_CODE (XEXP (x, 0)) == MULT
7477 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7479 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7480 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7481 *total += rtx_cost (XEXP (x, 1), code, false);
7486 case AND: case XOR: case IOR:
7489 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7491 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7492 || subcode == LSHIFTRT || subcode == ASHIFTRT
7493 || (code == AND && subcode == NOT))
7495 /* It's just the cost of the two operands. */
7501 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7505 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7509 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7510 && (mode == SFmode || !TARGET_VFP_SINGLE))
7512 *total = COSTS_N_INSNS (1);
7518 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7527 if (cc_register (XEXP (x, 0), VOIDmode))
7530 *total = COSTS_N_INSNS (1);
7534 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7535 && (mode == SFmode || !TARGET_VFP_SINGLE))
7536 *total = COSTS_N_INSNS (1);
7538 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7543 return arm_rtx_costs_1 (x, outer_code, total, 0);
7546 if (const_ok_for_arm (INTVAL (x)))
7547 /* A multiplication by a constant requires another instruction
7548 to load the constant to a register. */
7549 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7551 else if (const_ok_for_arm (~INTVAL (x)))
7552 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7553 else if (const_ok_for_arm (-INTVAL (x)))
7555 if (outer_code == COMPARE || outer_code == PLUS
7556 || outer_code == MINUS)
7559 *total = COSTS_N_INSNS (1);
7562 *total = COSTS_N_INSNS (2);
7568 *total = COSTS_N_INSNS (2);
7572 *total = COSTS_N_INSNS (4);
7577 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7578 cost of these slightly. */
7579 *total = COSTS_N_INSNS (1) + 1;
7583 if (mode != VOIDmode)
7584 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7586 *total = COSTS_N_INSNS (4); /* How knows? */
7591 /* RTX costs when optimizing for size. */
7593 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7597 return arm_size_rtx_costs (x, (enum rtx_code) code,
7598 (enum rtx_code) outer_code, total);
7600 return current_tune->rtx_costs (x, (enum rtx_code) code,
7601 (enum rtx_code) outer_code,
7605 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7606 supported on any "slowmul" cores, so it can be ignored. */
7609 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7610 int *total, bool speed)
7612 enum machine_mode mode = GET_MODE (x);
7616 *total = thumb1_rtx_costs (x, code, outer_code);
7623 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7626 *total = COSTS_N_INSNS (20);
7630 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7632 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7633 & (unsigned HOST_WIDE_INT) 0xffffffff);
7634 int cost, const_ok = const_ok_for_arm (i);
7635 int j, booth_unit_size;
7637 /* Tune as appropriate. */
7638 cost = const_ok ? 4 : 8;
7639 booth_unit_size = 2;
7640 for (j = 0; i && j < 32; j += booth_unit_size)
7642 i >>= booth_unit_size;
7646 *total = COSTS_N_INSNS (cost);
7647 *total += rtx_cost (XEXP (x, 0), code, speed);
7651 *total = COSTS_N_INSNS (20);
7655 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7660 /* RTX cost for cores with a fast multiply unit (M variants). */
7663 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7664 int *total, bool speed)
7666 enum machine_mode mode = GET_MODE (x);
7670 *total = thumb1_rtx_costs (x, code, outer_code);
7674 /* ??? should thumb2 use different costs? */
7678 /* There is no point basing this on the tuning, since it is always the
7679 fast variant if it exists at all. */
7681 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7682 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7683 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7685 *total = COSTS_N_INSNS(2);
7692 *total = COSTS_N_INSNS (5);
7696 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7698 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7699 & (unsigned HOST_WIDE_INT) 0xffffffff);
7700 int cost, const_ok = const_ok_for_arm (i);
7701 int j, booth_unit_size;
7703 /* Tune as appropriate. */
7704 cost = const_ok ? 4 : 8;
7705 booth_unit_size = 8;
7706 for (j = 0; i && j < 32; j += booth_unit_size)
7708 i >>= booth_unit_size;
7712 *total = COSTS_N_INSNS(cost);
7718 *total = COSTS_N_INSNS (4);
7722 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7724 if (TARGET_HARD_FLOAT
7726 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7728 *total = COSTS_N_INSNS (1);
7733 /* Requires a lib call */
7734 *total = COSTS_N_INSNS (20);
7738 return arm_rtx_costs_1 (x, outer_code, total, speed);
7743 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7744 so it can be ignored. */
7747 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7748 int *total, bool speed)
7750 enum machine_mode mode = GET_MODE (x);
7754 *total = thumb1_rtx_costs (x, code, outer_code);
7761 if (GET_CODE (XEXP (x, 0)) != MULT)
7762 return arm_rtx_costs_1 (x, outer_code, total, speed);
7764 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7765 will stall until the multiplication is complete. */
7766 *total = COSTS_N_INSNS (3);
7770 /* There is no point basing this on the tuning, since it is always the
7771 fast variant if it exists at all. */
7773 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7774 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7775 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7777 *total = COSTS_N_INSNS (2);
7784 *total = COSTS_N_INSNS (5);
7788 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7790 /* If operand 1 is a constant we can more accurately
7791 calculate the cost of the multiply. The multiplier can
7792 retire 15 bits on the first cycle and a further 12 on the
7793 second. We do, of course, have to load the constant into
7794 a register first. */
7795 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7796 /* There's a general overhead of one cycle. */
7798 unsigned HOST_WIDE_INT masked_const;
7803 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7805 masked_const = i & 0xffff8000;
7806 if (masked_const != 0)
7809 masked_const = i & 0xf8000000;
7810 if (masked_const != 0)
7813 *total = COSTS_N_INSNS (cost);
7819 *total = COSTS_N_INSNS (3);
7823 /* Requires a lib call */
7824 *total = COSTS_N_INSNS (20);
7828 return arm_rtx_costs_1 (x, outer_code, total, speed);
7833 /* RTX costs for 9e (and later) cores. */
7836 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7837 int *total, bool speed)
7839 enum machine_mode mode = GET_MODE (x);
7846 *total = COSTS_N_INSNS (3);
7850 *total = thumb1_rtx_costs (x, code, outer_code);
7858 /* There is no point basing this on the tuning, since it is always the
7859 fast variant if it exists at all. */
7861 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7862 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7863 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7865 *total = COSTS_N_INSNS (2);
7872 *total = COSTS_N_INSNS (5);
7878 *total = COSTS_N_INSNS (2);
7882 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7884 if (TARGET_HARD_FLOAT
7886 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7888 *total = COSTS_N_INSNS (1);
7893 *total = COSTS_N_INSNS (20);
7897 return arm_rtx_costs_1 (x, outer_code, total, speed);
7900 /* All address computations that can be done are free, but rtx cost returns
7901 the same for practically all of them. So we weight the different types
7902 of address here in the order (most pref first):
7903 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7905 arm_arm_address_cost (rtx x)
7907 enum rtx_code c = GET_CODE (x);
7909 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7911 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7916 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7919 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7929 arm_thumb_address_cost (rtx x)
7931 enum rtx_code c = GET_CODE (x);
7936 && GET_CODE (XEXP (x, 0)) == REG
7937 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7944 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7946 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7949 /* Adjust cost hook for XScale. */
7951 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7953 /* Some true dependencies can have a higher cost depending
7954 on precisely how certain input operands are used. */
7955 if (REG_NOTE_KIND(link) == 0
7956 && recog_memoized (insn) >= 0
7957 && recog_memoized (dep) >= 0)
7959 int shift_opnum = get_attr_shift (insn);
7960 enum attr_type attr_type = get_attr_type (dep);
7962 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7963 operand for INSN. If we have a shifted input operand and the
7964 instruction we depend on is another ALU instruction, then we may
7965 have to account for an additional stall. */
7966 if (shift_opnum != 0
7967 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7969 rtx shifted_operand;
7972 /* Get the shifted operand. */
7973 extract_insn (insn);
7974 shifted_operand = recog_data.operand[shift_opnum];
7976 /* Iterate over all the operands in DEP. If we write an operand
7977 that overlaps with SHIFTED_OPERAND, then we have increase the
7978 cost of this dependency. */
7980 preprocess_constraints ();
7981 for (opno = 0; opno < recog_data.n_operands; opno++)
7983 /* We can ignore strict inputs. */
7984 if (recog_data.operand_type[opno] == OP_IN)
7987 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7999 /* Adjust cost hook for Cortex A9. */
8001 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8003 switch (REG_NOTE_KIND (link))
8010 case REG_DEP_OUTPUT:
8011 if (recog_memoized (insn) >= 0
8012 && recog_memoized (dep) >= 0)
8014 if (GET_CODE (PATTERN (insn)) == SET)
8017 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8019 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8021 enum attr_type attr_type_insn = get_attr_type (insn);
8022 enum attr_type attr_type_dep = get_attr_type (dep);
8024 /* By default all dependencies of the form
8027 have an extra latency of 1 cycle because
8028 of the input and output dependency in this
8029 case. However this gets modeled as an true
8030 dependency and hence all these checks. */
8031 if (REG_P (SET_DEST (PATTERN (insn)))
8032 && REG_P (SET_DEST (PATTERN (dep)))
8033 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8034 SET_DEST (PATTERN (dep))))
8036 /* FMACS is a special case where the dependant
8037 instruction can be issued 3 cycles before
8038 the normal latency in case of an output
8040 if ((attr_type_insn == TYPE_FMACS
8041 || attr_type_insn == TYPE_FMACD)
8042 && (attr_type_dep == TYPE_FMACS
8043 || attr_type_dep == TYPE_FMACD))
8045 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8046 *cost = insn_default_latency (dep) - 3;
8048 *cost = insn_default_latency (dep);
8053 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8054 *cost = insn_default_latency (dep) + 1;
8056 *cost = insn_default_latency (dep);
8072 /* Adjust cost hook for FA726TE. */
8074 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8076 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8077 have penalty of 3. */
8078 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8079 && recog_memoized (insn) >= 0
8080 && recog_memoized (dep) >= 0
8081 && get_attr_conds (dep) == CONDS_SET)
8083 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8084 if (get_attr_conds (insn) == CONDS_USE
8085 && get_attr_type (insn) != TYPE_BRANCH)
8091 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8092 || get_attr_conds (insn) == CONDS_USE)
8102 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8103 It corrects the value of COST based on the relationship between
8104 INSN and DEP through the dependence LINK. It returns the new
8105 value. There is a per-core adjust_cost hook to adjust scheduler costs
8106 and the per-core hook can choose to completely override the generic
8107 adjust_cost function. Only put bits of code into arm_adjust_cost that
8108 are common across all cores. */
8110 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8114 /* When generating Thumb-1 code, we want to place flag-setting operations
8115 close to a conditional branch which depends on them, so that we can
8116 omit the comparison. */
8118 && REG_NOTE_KIND (link) == 0
8119 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8120 && recog_memoized (dep) >= 0
8121 && get_attr_conds (dep) == CONDS_SET)
8124 if (current_tune->sched_adjust_cost != NULL)
8126 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8130 /* XXX This is not strictly true for the FPA. */
8131 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8132 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8135 /* Call insns don't incur a stall, even if they follow a load. */
8136 if (REG_NOTE_KIND (link) == 0
8137 && GET_CODE (insn) == CALL_INSN)
8140 if ((i_pat = single_set (insn)) != NULL
8141 && GET_CODE (SET_SRC (i_pat)) == MEM
8142 && (d_pat = single_set (dep)) != NULL
8143 && GET_CODE (SET_DEST (d_pat)) == MEM)
8145 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8146 /* This is a load after a store, there is no conflict if the load reads
8147 from a cached area. Assume that loads from the stack, and from the
8148 constant pool are cached, and that others will miss. This is a
8151 if ((GET_CODE (src_mem) == SYMBOL_REF
8152 && CONSTANT_POOL_ADDRESS_P (src_mem))
8153 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8154 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8155 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8162 static int fp_consts_inited = 0;
8164 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8165 static const char * const strings_fp[8] =
8168 "4", "5", "0.5", "10"
8171 static REAL_VALUE_TYPE values_fp[8];
8174 init_fp_table (void)
8180 fp_consts_inited = 1;
8182 fp_consts_inited = 8;
8184 for (i = 0; i < fp_consts_inited; i++)
8186 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8191 /* Return TRUE if rtx X is a valid immediate FP constant. */
8193 arm_const_double_rtx (rtx x)
8198 if (!fp_consts_inited)
8201 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8202 if (REAL_VALUE_MINUS_ZERO (r))
8205 for (i = 0; i < fp_consts_inited; i++)
8206 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8212 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8214 neg_const_double_rtx_ok_for_fpa (rtx x)
8219 if (!fp_consts_inited)
8222 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8223 r = real_value_negate (&r);
8224 if (REAL_VALUE_MINUS_ZERO (r))
8227 for (i = 0; i < 8; i++)
8228 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8235 /* VFPv3 has a fairly wide range of representable immediates, formed from
8236 "quarter-precision" floating-point values. These can be evaluated using this
8237 formula (with ^ for exponentiation):
8241 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8242 16 <= n <= 31 and 0 <= r <= 7.
8244 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8246 - A (most-significant) is the sign bit.
8247 - BCD are the exponent (encoded as r XOR 3).
8248 - EFGH are the mantissa (encoded as n - 16).
8251 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8252 fconst[sd] instruction, or -1 if X isn't suitable. */
8254 vfp3_const_double_index (rtx x)
8256 REAL_VALUE_TYPE r, m;
8258 unsigned HOST_WIDE_INT mantissa, mant_hi;
8259 unsigned HOST_WIDE_INT mask;
8260 HOST_WIDE_INT m1, m2;
8261 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8263 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8266 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8268 /* We can't represent these things, so detect them first. */
8269 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8272 /* Extract sign, exponent and mantissa. */
8273 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8274 r = real_value_abs (&r);
8275 exponent = REAL_EXP (&r);
8276 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8277 highest (sign) bit, with a fixed binary point at bit point_pos.
8278 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8279 bits for the mantissa, this may fail (low bits would be lost). */
8280 real_ldexp (&m, &r, point_pos - exponent);
8281 REAL_VALUE_TO_INT (&m1, &m2, m);
8285 /* If there are bits set in the low part of the mantissa, we can't
8286 represent this value. */
8290 /* Now make it so that mantissa contains the most-significant bits, and move
8291 the point_pos to indicate that the least-significant bits have been
8293 point_pos -= HOST_BITS_PER_WIDE_INT;
8296 /* We can permit four significant bits of mantissa only, plus a high bit
8297 which is always 1. */
8298 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8299 if ((mantissa & mask) != 0)
8302 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8303 mantissa >>= point_pos - 5;
8305 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8306 floating-point immediate zero with Neon using an integer-zero load, but
8307 that case is handled elsewhere.) */
8311 gcc_assert (mantissa >= 16 && mantissa <= 31);
8313 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8314 normalized significands are in the range [1, 2). (Our mantissa is shifted
8315 left 4 places at this point relative to normalized IEEE754 values). GCC
8316 internally uses [0.5, 1) (see real.c), so the exponent returned from
8317 REAL_EXP must be altered. */
8318 exponent = 5 - exponent;
8320 if (exponent < 0 || exponent > 7)
8323 /* Sign, mantissa and exponent are now in the correct form to plug into the
8324 formula described in the comment above. */
8325 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8328 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8330 vfp3_const_double_rtx (rtx x)
8335 return vfp3_const_double_index (x) != -1;
8338 /* Recognize immediates which can be used in various Neon instructions. Legal
8339 immediates are described by the following table (for VMVN variants, the
8340 bitwise inverse of the constant shown is recognized. In either case, VMOV
8341 is output and the correct instruction to use for a given constant is chosen
8342 by the assembler). The constant shown is replicated across all elements of
8343 the destination vector.
8345 insn elems variant constant (binary)
8346 ---- ----- ------- -----------------
8347 vmov i32 0 00000000 00000000 00000000 abcdefgh
8348 vmov i32 1 00000000 00000000 abcdefgh 00000000
8349 vmov i32 2 00000000 abcdefgh 00000000 00000000
8350 vmov i32 3 abcdefgh 00000000 00000000 00000000
8351 vmov i16 4 00000000 abcdefgh
8352 vmov i16 5 abcdefgh 00000000
8353 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8354 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8355 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8356 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8357 vmvn i16 10 00000000 abcdefgh
8358 vmvn i16 11 abcdefgh 00000000
8359 vmov i32 12 00000000 00000000 abcdefgh 11111111
8360 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8361 vmov i32 14 00000000 abcdefgh 11111111 11111111
8362 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8364 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8365 eeeeeeee ffffffff gggggggg hhhhhhhh
8366 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8368 For case 18, B = !b. Representable values are exactly those accepted by
8369 vfp3_const_double_index, but are output as floating-point numbers rather
8372 Variants 0-5 (inclusive) may also be used as immediates for the second
8373 operand of VORR/VBIC instructions.
8375 The INVERSE argument causes the bitwise inverse of the given operand to be
8376 recognized instead (used for recognizing legal immediates for the VAND/VORN
8377 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8378 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8379 output, rather than the real insns vbic/vorr).
8381 INVERSE makes no difference to the recognition of float vectors.
8383 The return value is the variant of immediate as shown in the above table, or
8384 -1 if the given value doesn't match any of the listed patterns.
8387 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8388 rtx *modconst, int *elementwidth)
8390 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8392 for (i = 0; i < idx; i += (STRIDE)) \
8397 immtype = (CLASS); \
8398 elsize = (ELSIZE); \
8402 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8403 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8404 unsigned char bytes[16];
8405 int immtype = -1, matches;
8406 unsigned int invmask = inverse ? 0xff : 0;
8408 /* Vectors of float constants. */
8409 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8411 rtx el0 = CONST_VECTOR_ELT (op, 0);
8414 if (!vfp3_const_double_rtx (el0))
8417 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8419 for (i = 1; i < n_elts; i++)
8421 rtx elt = CONST_VECTOR_ELT (op, i);
8424 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8426 if (!REAL_VALUES_EQUAL (r0, re))
8431 *modconst = CONST_VECTOR_ELT (op, 0);
8439 /* Splat vector constant out into a byte vector. */
8440 for (i = 0; i < n_elts; i++)
8442 rtx el = CONST_VECTOR_ELT (op, i);
8443 unsigned HOST_WIDE_INT elpart;
8444 unsigned int part, parts;
8446 if (GET_CODE (el) == CONST_INT)
8448 elpart = INTVAL (el);
8451 else if (GET_CODE (el) == CONST_DOUBLE)
8453 elpart = CONST_DOUBLE_LOW (el);
8459 for (part = 0; part < parts; part++)
8462 for (byte = 0; byte < innersize; byte++)
8464 bytes[idx++] = (elpart & 0xff) ^ invmask;
8465 elpart >>= BITS_PER_UNIT;
8467 if (GET_CODE (el) == CONST_DOUBLE)
8468 elpart = CONST_DOUBLE_HIGH (el);
8473 gcc_assert (idx == GET_MODE_SIZE (mode));
8477 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8478 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8480 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8481 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8483 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8484 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8486 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8487 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8489 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8491 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8493 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8494 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8496 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8497 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8499 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8500 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8502 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8503 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8505 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8507 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8509 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8510 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8512 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8513 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8515 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8516 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8518 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8519 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8521 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8523 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8524 && bytes[i] == bytes[(i + 8) % idx]);
8532 *elementwidth = elsize;
8536 unsigned HOST_WIDE_INT imm = 0;
8538 /* Un-invert bytes of recognized vector, if necessary. */
8540 for (i = 0; i < idx; i++)
8541 bytes[i] ^= invmask;
8545 /* FIXME: Broken on 32-bit H_W_I hosts. */
8546 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8548 for (i = 0; i < 8; i++)
8549 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8550 << (i * BITS_PER_UNIT);
8552 *modconst = GEN_INT (imm);
8556 unsigned HOST_WIDE_INT imm = 0;
8558 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8559 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8561 *modconst = GEN_INT (imm);
8569 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8570 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8571 float elements), and a modified constant (whatever should be output for a
8572 VMOV) in *MODCONST. */
8575 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8576 rtx *modconst, int *elementwidth)
8580 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8586 *modconst = tmpconst;
8589 *elementwidth = tmpwidth;
8594 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8595 the immediate is valid, write a constant suitable for using as an operand
8596 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8597 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8600 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8601 rtx *modconst, int *elementwidth)
8605 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8607 if (retval < 0 || retval > 5)
8611 *modconst = tmpconst;
8614 *elementwidth = tmpwidth;
8619 /* Return a string suitable for output of Neon immediate logic operation
8623 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8624 int inverse, int quad)
8626 int width, is_valid;
8627 static char templ[40];
8629 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8631 gcc_assert (is_valid != 0);
8634 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8636 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8641 /* Output a sequence of pairwise operations to implement a reduction.
8642 NOTE: We do "too much work" here, because pairwise operations work on two
8643 registers-worth of operands in one go. Unfortunately we can't exploit those
8644 extra calculations to do the full operation in fewer steps, I don't think.
8645 Although all vector elements of the result but the first are ignored, we
8646 actually calculate the same result in each of the elements. An alternative
8647 such as initially loading a vector with zero to use as each of the second
8648 operands would use up an additional register and take an extra instruction,
8649 for no particular gain. */
8652 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8653 rtx (*reduc) (rtx, rtx, rtx))
8655 enum machine_mode inner = GET_MODE_INNER (mode);
8656 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8659 for (i = parts / 2; i >= 1; i /= 2)
8661 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8662 emit_insn (reduc (dest, tmpsum, tmpsum));
8667 /* If VALS is a vector constant that can be loaded into a register
8668 using VDUP, generate instructions to do so and return an RTX to
8669 assign to the register. Otherwise return NULL_RTX. */
8672 neon_vdup_constant (rtx vals)
8674 enum machine_mode mode = GET_MODE (vals);
8675 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8676 int n_elts = GET_MODE_NUNITS (mode);
8677 bool all_same = true;
8681 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8684 for (i = 0; i < n_elts; ++i)
8686 x = XVECEXP (vals, 0, i);
8687 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8692 /* The elements are not all the same. We could handle repeating
8693 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8694 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8698 /* We can load this constant by using VDUP and a constant in a
8699 single ARM register. This will be cheaper than a vector
8702 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8703 return gen_rtx_VEC_DUPLICATE (mode, x);
8706 /* Generate code to load VALS, which is a PARALLEL containing only
8707 constants (for vec_init) or CONST_VECTOR, efficiently into a
8708 register. Returns an RTX to copy into the register, or NULL_RTX
8709 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8712 neon_make_constant (rtx vals)
8714 enum machine_mode mode = GET_MODE (vals);
8716 rtx const_vec = NULL_RTX;
8717 int n_elts = GET_MODE_NUNITS (mode);
8721 if (GET_CODE (vals) == CONST_VECTOR)
8723 else if (GET_CODE (vals) == PARALLEL)
8725 /* A CONST_VECTOR must contain only CONST_INTs and
8726 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8727 Only store valid constants in a CONST_VECTOR. */
8728 for (i = 0; i < n_elts; ++i)
8730 rtx x = XVECEXP (vals, 0, i);
8731 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8734 if (n_const == n_elts)
8735 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8740 if (const_vec != NULL
8741 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8742 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8744 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8745 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8746 pipeline cycle; creating the constant takes one or two ARM
8749 else if (const_vec != NULL_RTX)
8750 /* Load from constant pool. On Cortex-A8 this takes two cycles
8751 (for either double or quad vectors). We can not take advantage
8752 of single-cycle VLD1 because we need a PC-relative addressing
8756 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8757 We can not construct an initializer. */
8761 /* Initialize vector TARGET to VALS. */
8764 neon_expand_vector_init (rtx target, rtx vals)
8766 enum machine_mode mode = GET_MODE (target);
8767 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8768 int n_elts = GET_MODE_NUNITS (mode);
8769 int n_var = 0, one_var = -1;
8770 bool all_same = true;
8774 for (i = 0; i < n_elts; ++i)
8776 x = XVECEXP (vals, 0, i);
8777 if (!CONSTANT_P (x))
8778 ++n_var, one_var = i;
8780 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8786 rtx constant = neon_make_constant (vals);
8787 if (constant != NULL_RTX)
8789 emit_move_insn (target, constant);
8794 /* Splat a single non-constant element if we can. */
8795 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8797 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8798 emit_insn (gen_rtx_SET (VOIDmode, target,
8799 gen_rtx_VEC_DUPLICATE (mode, x)));
8803 /* One field is non-constant. Load constant then overwrite varying
8804 field. This is more efficient than using the stack. */
8807 rtx copy = copy_rtx (vals);
8808 rtx index = GEN_INT (one_var);
8810 /* Load constant part of vector, substitute neighboring value for
8812 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8813 neon_expand_vector_init (target, copy);
8815 /* Insert variable. */
8816 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8820 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8823 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8826 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8829 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8832 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8835 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8838 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8841 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8844 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8852 /* Construct the vector in memory one field at a time
8853 and load the whole vector. */
8854 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8855 for (i = 0; i < n_elts; i++)
8856 emit_move_insn (adjust_address_nv (mem, inner_mode,
8857 i * GET_MODE_SIZE (inner_mode)),
8858 XVECEXP (vals, 0, i));
8859 emit_move_insn (target, mem);
8862 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8863 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8864 reported source locations are bogus. */
8867 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8872 gcc_assert (GET_CODE (operand) == CONST_INT);
8874 lane = INTVAL (operand);
8876 if (lane < low || lane >= high)
8880 /* Bounds-check lanes. */
8883 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8885 bounds_check (operand, low, high, "lane out of range");
8888 /* Bounds-check constants. */
8891 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8893 bounds_check (operand, low, high, "constant out of range");
8897 neon_element_bits (enum machine_mode mode)
8900 return GET_MODE_BITSIZE (mode);
8902 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8906 /* Predicates for `match_operand' and `match_operator'. */
8908 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8910 cirrus_memory_offset (rtx op)
8912 /* Reject eliminable registers. */
8913 if (! (reload_in_progress || reload_completed)
8914 && ( reg_mentioned_p (frame_pointer_rtx, op)
8915 || reg_mentioned_p (arg_pointer_rtx, op)
8916 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8917 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8918 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8919 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8922 if (GET_CODE (op) == MEM)
8928 /* Match: (mem (reg)). */
8929 if (GET_CODE (ind) == REG)
8935 if (GET_CODE (ind) == PLUS
8936 && GET_CODE (XEXP (ind, 0)) == REG
8937 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8938 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8945 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8946 WB is true if full writeback address modes are allowed and is false
8947 if limited writeback address modes (POST_INC and PRE_DEC) are
8951 arm_coproc_mem_operand (rtx op, bool wb)
8955 /* Reject eliminable registers. */
8956 if (! (reload_in_progress || reload_completed)
8957 && ( reg_mentioned_p (frame_pointer_rtx, op)
8958 || reg_mentioned_p (arg_pointer_rtx, op)
8959 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8960 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8961 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8962 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8965 /* Constants are converted into offsets from labels. */
8966 if (GET_CODE (op) != MEM)
8971 if (reload_completed
8972 && (GET_CODE (ind) == LABEL_REF
8973 || (GET_CODE (ind) == CONST
8974 && GET_CODE (XEXP (ind, 0)) == PLUS
8975 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8976 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8979 /* Match: (mem (reg)). */
8980 if (GET_CODE (ind) == REG)
8981 return arm_address_register_rtx_p (ind, 0);
8983 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8984 acceptable in any case (subject to verification by
8985 arm_address_register_rtx_p). We need WB to be true to accept
8986 PRE_INC and POST_DEC. */
8987 if (GET_CODE (ind) == POST_INC
8988 || GET_CODE (ind) == PRE_DEC
8990 && (GET_CODE (ind) == PRE_INC
8991 || GET_CODE (ind) == POST_DEC)))
8992 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8995 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8996 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8997 && GET_CODE (XEXP (ind, 1)) == PLUS
8998 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8999 ind = XEXP (ind, 1);
9004 if (GET_CODE (ind) == PLUS
9005 && GET_CODE (XEXP (ind, 0)) == REG
9006 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9007 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9008 && INTVAL (XEXP (ind, 1)) > -1024
9009 && INTVAL (XEXP (ind, 1)) < 1024
9010 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9016 /* Return TRUE if OP is a memory operand which we can load or store a vector
9017 to/from. TYPE is one of the following values:
9018 0 - Vector load/stor (vldr)
9019 1 - Core registers (ldm)
9020 2 - Element/structure loads (vld1)
9023 neon_vector_mem_operand (rtx op, int type)
9027 /* Reject eliminable registers. */
9028 if (! (reload_in_progress || reload_completed)
9029 && ( reg_mentioned_p (frame_pointer_rtx, op)
9030 || reg_mentioned_p (arg_pointer_rtx, op)
9031 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9032 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9033 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9034 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9037 /* Constants are converted into offsets from labels. */
9038 if (GET_CODE (op) != MEM)
9043 if (reload_completed
9044 && (GET_CODE (ind) == LABEL_REF
9045 || (GET_CODE (ind) == CONST
9046 && GET_CODE (XEXP (ind, 0)) == PLUS
9047 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9048 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9051 /* Match: (mem (reg)). */
9052 if (GET_CODE (ind) == REG)
9053 return arm_address_register_rtx_p (ind, 0);
9055 /* Allow post-increment with Neon registers. */
9056 if ((type != 1 && GET_CODE (ind) == POST_INC)
9057 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9058 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9060 /* FIXME: vld1 allows register post-modify. */
9066 && GET_CODE (ind) == PLUS
9067 && GET_CODE (XEXP (ind, 0)) == REG
9068 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9069 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9070 && INTVAL (XEXP (ind, 1)) > -1024
9071 && INTVAL (XEXP (ind, 1)) < 1016
9072 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9078 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9081 neon_struct_mem_operand (rtx op)
9085 /* Reject eliminable registers. */
9086 if (! (reload_in_progress || reload_completed)
9087 && ( reg_mentioned_p (frame_pointer_rtx, op)
9088 || reg_mentioned_p (arg_pointer_rtx, op)
9089 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9090 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9091 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9092 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9095 /* Constants are converted into offsets from labels. */
9096 if (GET_CODE (op) != MEM)
9101 if (reload_completed
9102 && (GET_CODE (ind) == LABEL_REF
9103 || (GET_CODE (ind) == CONST
9104 && GET_CODE (XEXP (ind, 0)) == PLUS
9105 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9106 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9109 /* Match: (mem (reg)). */
9110 if (GET_CODE (ind) == REG)
9111 return arm_address_register_rtx_p (ind, 0);
9113 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9114 if (GET_CODE (ind) == POST_INC
9115 || GET_CODE (ind) == PRE_DEC)
9116 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9121 /* Return true if X is a register that will be eliminated later on. */
9123 arm_eliminable_register (rtx x)
9125 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9126 || REGNO (x) == ARG_POINTER_REGNUM
9127 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9128 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9131 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9132 coprocessor registers. Otherwise return NO_REGS. */
9135 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9139 if (!TARGET_NEON_FP16)
9140 return GENERAL_REGS;
9141 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9143 return GENERAL_REGS;
9146 /* The neon move patterns handle all legitimate vector and struct
9150 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9151 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9152 || VALID_NEON_STRUCT_MODE (mode)))
9155 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9158 return GENERAL_REGS;
9161 /* Values which must be returned in the most-significant end of the return
9165 arm_return_in_msb (const_tree valtype)
9167 return (TARGET_AAPCS_BASED
9169 && (AGGREGATE_TYPE_P (valtype)
9170 || TREE_CODE (valtype) == COMPLEX_TYPE));
9173 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9174 Use by the Cirrus Maverick code which has to workaround
9175 a hardware bug triggered by such instructions. */
9177 arm_memory_load_p (rtx insn)
9179 rtx body, lhs, rhs;;
9181 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9184 body = PATTERN (insn);
9186 if (GET_CODE (body) != SET)
9189 lhs = XEXP (body, 0);
9190 rhs = XEXP (body, 1);
9192 lhs = REG_OR_SUBREG_RTX (lhs);
9194 /* If the destination is not a general purpose
9195 register we do not have to worry. */
9196 if (GET_CODE (lhs) != REG
9197 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9200 /* As well as loads from memory we also have to react
9201 to loads of invalid constants which will be turned
9202 into loads from the minipool. */
9203 return (GET_CODE (rhs) == MEM
9204 || GET_CODE (rhs) == SYMBOL_REF
9205 || note_invalid_constants (insn, -1, false));
9208 /* Return TRUE if INSN is a Cirrus instruction. */
9210 arm_cirrus_insn_p (rtx insn)
9212 enum attr_cirrus attr;
9214 /* get_attr cannot accept USE or CLOBBER. */
9216 || GET_CODE (insn) != INSN
9217 || GET_CODE (PATTERN (insn)) == USE
9218 || GET_CODE (PATTERN (insn)) == CLOBBER)
9221 attr = get_attr_cirrus (insn);
9223 return attr != CIRRUS_NOT;
9226 /* Cirrus reorg for invalid instruction combinations. */
9228 cirrus_reorg (rtx first)
9230 enum attr_cirrus attr;
9231 rtx body = PATTERN (first);
9235 /* Any branch must be followed by 2 non Cirrus instructions. */
9236 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9239 t = next_nonnote_insn (first);
9241 if (arm_cirrus_insn_p (t))
9244 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9248 emit_insn_after (gen_nop (), first);
9253 /* (float (blah)) is in parallel with a clobber. */
9254 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9255 body = XVECEXP (body, 0, 0);
9257 if (GET_CODE (body) == SET)
9259 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9261 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9262 be followed by a non Cirrus insn. */
9263 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9265 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9266 emit_insn_after (gen_nop (), first);
9270 else if (arm_memory_load_p (first))
9272 unsigned int arm_regno;
9274 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9275 ldr/cfmv64hr combination where the Rd field is the same
9276 in both instructions must be split with a non Cirrus
9283 /* Get Arm register number for ldr insn. */
9284 if (GET_CODE (lhs) == REG)
9285 arm_regno = REGNO (lhs);
9288 gcc_assert (GET_CODE (rhs) == REG);
9289 arm_regno = REGNO (rhs);
9293 first = next_nonnote_insn (first);
9295 if (! arm_cirrus_insn_p (first))
9298 body = PATTERN (first);
9300 /* (float (blah)) is in parallel with a clobber. */
9301 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9302 body = XVECEXP (body, 0, 0);
9304 if (GET_CODE (body) == FLOAT)
9305 body = XEXP (body, 0);
9307 if (get_attr_cirrus (first) == CIRRUS_MOVE
9308 && GET_CODE (XEXP (body, 1)) == REG
9309 && arm_regno == REGNO (XEXP (body, 1)))
9310 emit_insn_after (gen_nop (), first);
9316 /* get_attr cannot accept USE or CLOBBER. */
9318 || GET_CODE (first) != INSN
9319 || GET_CODE (PATTERN (first)) == USE
9320 || GET_CODE (PATTERN (first)) == CLOBBER)
9323 attr = get_attr_cirrus (first);
9325 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9326 must be followed by a non-coprocessor instruction. */
9327 if (attr == CIRRUS_COMPARE)
9331 t = next_nonnote_insn (first);
9333 if (arm_cirrus_insn_p (t))
9336 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9340 emit_insn_after (gen_nop (), first);
9346 /* Return TRUE if X references a SYMBOL_REF. */
9348 symbol_mentioned_p (rtx x)
9353 if (GET_CODE (x) == SYMBOL_REF)
9356 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9357 are constant offsets, not symbols. */
9358 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9361 fmt = GET_RTX_FORMAT (GET_CODE (x));
9363 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9369 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9370 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9373 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9380 /* Return TRUE if X references a LABEL_REF. */
9382 label_mentioned_p (rtx x)
9387 if (GET_CODE (x) == LABEL_REF)
9390 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9391 instruction, but they are constant offsets, not symbols. */
9392 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9395 fmt = GET_RTX_FORMAT (GET_CODE (x));
9396 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9402 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9403 if (label_mentioned_p (XVECEXP (x, i, j)))
9406 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9414 tls_mentioned_p (rtx x)
9416 switch (GET_CODE (x))
9419 return tls_mentioned_p (XEXP (x, 0));
9422 if (XINT (x, 1) == UNSPEC_TLS)
9430 /* Must not copy any rtx that uses a pc-relative address. */
9433 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9435 if (GET_CODE (*x) == UNSPEC
9436 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9442 arm_cannot_copy_insn_p (rtx insn)
9444 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9450 enum rtx_code code = GET_CODE (x);
9467 /* Return 1 if memory locations are adjacent. */
9469 adjacent_mem_locations (rtx a, rtx b)
9471 /* We don't guarantee to preserve the order of these memory refs. */
9472 if (volatile_refs_p (a) || volatile_refs_p (b))
9475 if ((GET_CODE (XEXP (a, 0)) == REG
9476 || (GET_CODE (XEXP (a, 0)) == PLUS
9477 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9478 && (GET_CODE (XEXP (b, 0)) == REG
9479 || (GET_CODE (XEXP (b, 0)) == PLUS
9480 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9482 HOST_WIDE_INT val0 = 0, val1 = 0;
9486 if (GET_CODE (XEXP (a, 0)) == PLUS)
9488 reg0 = XEXP (XEXP (a, 0), 0);
9489 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9494 if (GET_CODE (XEXP (b, 0)) == PLUS)
9496 reg1 = XEXP (XEXP (b, 0), 0);
9497 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9502 /* Don't accept any offset that will require multiple
9503 instructions to handle, since this would cause the
9504 arith_adjacentmem pattern to output an overlong sequence. */
9505 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9508 /* Don't allow an eliminable register: register elimination can make
9509 the offset too large. */
9510 if (arm_eliminable_register (reg0))
9513 val_diff = val1 - val0;
9517 /* If the target has load delay slots, then there's no benefit
9518 to using an ldm instruction unless the offset is zero and
9519 we are optimizing for size. */
9520 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9521 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9522 && (val_diff == 4 || val_diff == -4));
9525 return ((REGNO (reg0) == REGNO (reg1))
9526 && (val_diff == 4 || val_diff == -4));
9532 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9533 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9534 instruction. ADD_OFFSET is nonzero if the base address register needs
9535 to be modified with an add instruction before we can use it. */
9538 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9539 int nops, HOST_WIDE_INT add_offset)
9541 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9542 if the offset isn't small enough. The reason 2 ldrs are faster
9543 is because these ARMs are able to do more than one cache access
9544 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9545 whilst the ARM8 has a double bandwidth cache. This means that
9546 these cores can do both an instruction fetch and a data fetch in
9547 a single cycle, so the trick of calculating the address into a
9548 scratch register (one of the result regs) and then doing a load
9549 multiple actually becomes slower (and no smaller in code size).
9550 That is the transformation
9552 ldr rd1, [rbase + offset]
9553 ldr rd2, [rbase + offset + 4]
9557 add rd1, rbase, offset
9558 ldmia rd1, {rd1, rd2}
9560 produces worse code -- '3 cycles + any stalls on rd2' instead of
9561 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9562 access per cycle, the first sequence could never complete in less
9563 than 6 cycles, whereas the ldm sequence would only take 5 and
9564 would make better use of sequential accesses if not hitting the
9567 We cheat here and test 'arm_ld_sched' which we currently know to
9568 only be true for the ARM8, ARM9 and StrongARM. If this ever
9569 changes, then the test below needs to be reworked. */
9570 if (nops == 2 && arm_ld_sched && add_offset != 0)
9573 /* XScale has load-store double instructions, but they have stricter
9574 alignment requirements than load-store multiple, so we cannot
9577 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9578 the pipeline until completion.
9586 An ldr instruction takes 1-3 cycles, but does not block the
9595 Best case ldr will always win. However, the more ldr instructions
9596 we issue, the less likely we are to be able to schedule them well.
9597 Using ldr instructions also increases code size.
9599 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9600 for counts of 3 or 4 regs. */
9601 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9606 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9607 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9608 an array ORDER which describes the sequence to use when accessing the
9609 offsets that produces an ascending order. In this sequence, each
9610 offset must be larger by exactly 4 than the previous one. ORDER[0]
9611 must have been filled in with the lowest offset by the caller.
9612 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9613 we use to verify that ORDER produces an ascending order of registers.
9614 Return true if it was possible to construct such an order, false if
9618 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9622 for (i = 1; i < nops; i++)
9626 order[i] = order[i - 1];
9627 for (j = 0; j < nops; j++)
9628 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9630 /* We must find exactly one offset that is higher than the
9631 previous one by 4. */
9632 if (order[i] != order[i - 1])
9636 if (order[i] == order[i - 1])
9638 /* The register numbers must be ascending. */
9639 if (unsorted_regs != NULL
9640 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9646 /* Used to determine in a peephole whether a sequence of load
9647 instructions can be changed into a load-multiple instruction.
9648 NOPS is the number of separate load instructions we are examining. The
9649 first NOPS entries in OPERANDS are the destination registers, the
9650 next NOPS entries are memory operands. If this function is
9651 successful, *BASE is set to the common base register of the memory
9652 accesses; *LOAD_OFFSET is set to the first memory location's offset
9653 from that base register.
9654 REGS is an array filled in with the destination register numbers.
9655 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9656 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9657 the sequence of registers in REGS matches the loads from ascending memory
9658 locations, and the function verifies that the register numbers are
9659 themselves ascending. If CHECK_REGS is false, the register numbers
9660 are stored in the order they are found in the operands. */
9662 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9663 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9665 int unsorted_regs[MAX_LDM_STM_OPS];
9666 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9667 int order[MAX_LDM_STM_OPS];
9668 rtx base_reg_rtx = NULL;
9672 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9673 easily extended if required. */
9674 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9676 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9678 /* Loop over the operands and check that the memory references are
9679 suitable (i.e. immediate offsets from the same base register). At
9680 the same time, extract the target register, and the memory
9682 for (i = 0; i < nops; i++)
9687 /* Convert a subreg of a mem into the mem itself. */
9688 if (GET_CODE (operands[nops + i]) == SUBREG)
9689 operands[nops + i] = alter_subreg (operands + (nops + i));
9691 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9693 /* Don't reorder volatile memory references; it doesn't seem worth
9694 looking for the case where the order is ok anyway. */
9695 if (MEM_VOLATILE_P (operands[nops + i]))
9698 offset = const0_rtx;
9700 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9701 || (GET_CODE (reg) == SUBREG
9702 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9703 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9704 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9706 || (GET_CODE (reg) == SUBREG
9707 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9708 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9713 base_reg = REGNO (reg);
9715 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9718 else if (base_reg != (int) REGNO (reg))
9719 /* Not addressed from the same base register. */
9722 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9723 ? REGNO (operands[i])
9724 : REGNO (SUBREG_REG (operands[i])));
9726 /* If it isn't an integer register, or if it overwrites the
9727 base register but isn't the last insn in the list, then
9728 we can't do this. */
9729 if (unsorted_regs[i] < 0
9730 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9731 || unsorted_regs[i] > 14
9732 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9735 unsorted_offsets[i] = INTVAL (offset);
9736 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9740 /* Not a suitable memory address. */
9744 /* All the useful information has now been extracted from the
9745 operands into unsorted_regs and unsorted_offsets; additionally,
9746 order[0] has been set to the lowest offset in the list. Sort
9747 the offsets into order, verifying that they are adjacent, and
9748 check that the register numbers are ascending. */
9749 if (!compute_offset_order (nops, unsorted_offsets, order,
9750 check_regs ? unsorted_regs : NULL))
9754 memcpy (saved_order, order, sizeof order);
9760 for (i = 0; i < nops; i++)
9761 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9763 *load_offset = unsorted_offsets[order[0]];
9767 && !peep2_reg_dead_p (nops, base_reg_rtx))
9770 if (unsorted_offsets[order[0]] == 0)
9771 ldm_case = 1; /* ldmia */
9772 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9773 ldm_case = 2; /* ldmib */
9774 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9775 ldm_case = 3; /* ldmda */
9776 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9777 ldm_case = 4; /* ldmdb */
9778 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9779 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9784 if (!multiple_operation_profitable_p (false, nops,
9786 ? unsorted_offsets[order[0]] : 0))
9792 /* Used to determine in a peephole whether a sequence of store instructions can
9793 be changed into a store-multiple instruction.
9794 NOPS is the number of separate store instructions we are examining.
9795 NOPS_TOTAL is the total number of instructions recognized by the peephole
9797 The first NOPS entries in OPERANDS are the source registers, the next
9798 NOPS entries are memory operands. If this function is successful, *BASE is
9799 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9800 to the first memory location's offset from that base register. REGS is an
9801 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9802 likewise filled with the corresponding rtx's.
9803 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9804 numbers to an ascending order of stores.
9805 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9806 from ascending memory locations, and the function verifies that the register
9807 numbers are themselves ascending. If CHECK_REGS is false, the register
9808 numbers are stored in the order they are found in the operands. */
9810 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9811 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9812 HOST_WIDE_INT *load_offset, bool check_regs)
9814 int unsorted_regs[MAX_LDM_STM_OPS];
9815 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9816 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9817 int order[MAX_LDM_STM_OPS];
9819 rtx base_reg_rtx = NULL;
9822 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9823 easily extended if required. */
9824 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9826 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9828 /* Loop over the operands and check that the memory references are
9829 suitable (i.e. immediate offsets from the same base register). At
9830 the same time, extract the target register, and the memory
9832 for (i = 0; i < nops; i++)
9837 /* Convert a subreg of a mem into the mem itself. */
9838 if (GET_CODE (operands[nops + i]) == SUBREG)
9839 operands[nops + i] = alter_subreg (operands + (nops + i));
9841 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9843 /* Don't reorder volatile memory references; it doesn't seem worth
9844 looking for the case where the order is ok anyway. */
9845 if (MEM_VOLATILE_P (operands[nops + i]))
9848 offset = const0_rtx;
9850 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9851 || (GET_CODE (reg) == SUBREG
9852 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9853 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9854 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9856 || (GET_CODE (reg) == SUBREG
9857 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9858 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9861 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9862 ? operands[i] : SUBREG_REG (operands[i]));
9863 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9867 base_reg = REGNO (reg);
9869 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9872 else if (base_reg != (int) REGNO (reg))
9873 /* Not addressed from the same base register. */
9876 /* If it isn't an integer register, then we can't do this. */
9877 if (unsorted_regs[i] < 0
9878 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9879 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9880 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9881 || unsorted_regs[i] > 14)
9884 unsorted_offsets[i] = INTVAL (offset);
9885 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9889 /* Not a suitable memory address. */
9893 /* All the useful information has now been extracted from the
9894 operands into unsorted_regs and unsorted_offsets; additionally,
9895 order[0] has been set to the lowest offset in the list. Sort
9896 the offsets into order, verifying that they are adjacent, and
9897 check that the register numbers are ascending. */
9898 if (!compute_offset_order (nops, unsorted_offsets, order,
9899 check_regs ? unsorted_regs : NULL))
9903 memcpy (saved_order, order, sizeof order);
9909 for (i = 0; i < nops; i++)
9911 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9913 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9916 *load_offset = unsorted_offsets[order[0]];
9920 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9923 if (unsorted_offsets[order[0]] == 0)
9924 stm_case = 1; /* stmia */
9925 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9926 stm_case = 2; /* stmib */
9927 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9928 stm_case = 3; /* stmda */
9929 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9930 stm_case = 4; /* stmdb */
9934 if (!multiple_operation_profitable_p (false, nops, 0))
9940 /* Routines for use in generating RTL. */
9942 /* Generate a load-multiple instruction. COUNT is the number of loads in
9943 the instruction; REGS and MEMS are arrays containing the operands.
9944 BASEREG is the base register to be used in addressing the memory operands.
9945 WBACK_OFFSET is nonzero if the instruction should update the base
9949 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9950 HOST_WIDE_INT wback_offset)
9955 if (!multiple_operation_profitable_p (false, count, 0))
9961 for (i = 0; i < count; i++)
9962 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9964 if (wback_offset != 0)
9965 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9973 result = gen_rtx_PARALLEL (VOIDmode,
9974 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9975 if (wback_offset != 0)
9977 XVECEXP (result, 0, 0)
9978 = gen_rtx_SET (VOIDmode, basereg,
9979 plus_constant (basereg, wback_offset));
9984 for (j = 0; i < count; i++, j++)
9985 XVECEXP (result, 0, i)
9986 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9991 /* Generate a store-multiple instruction. COUNT is the number of stores in
9992 the instruction; REGS and MEMS are arrays containing the operands.
9993 BASEREG is the base register to be used in addressing the memory operands.
9994 WBACK_OFFSET is nonzero if the instruction should update the base
9998 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9999 HOST_WIDE_INT wback_offset)
10004 if (GET_CODE (basereg) == PLUS)
10005 basereg = XEXP (basereg, 0);
10007 if (!multiple_operation_profitable_p (false, count, 0))
10013 for (i = 0; i < count; i++)
10014 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10016 if (wback_offset != 0)
10017 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10019 seq = get_insns ();
10025 result = gen_rtx_PARALLEL (VOIDmode,
10026 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10027 if (wback_offset != 0)
10029 XVECEXP (result, 0, 0)
10030 = gen_rtx_SET (VOIDmode, basereg,
10031 plus_constant (basereg, wback_offset));
10036 for (j = 0; i < count; i++, j++)
10037 XVECEXP (result, 0, i)
10038 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10043 /* Generate either a load-multiple or a store-multiple instruction. This
10044 function can be used in situations where we can start with a single MEM
10045 rtx and adjust its address upwards.
10046 COUNT is the number of operations in the instruction, not counting a
10047 possible update of the base register. REGS is an array containing the
10049 BASEREG is the base register to be used in addressing the memory operands,
10050 which are constructed from BASEMEM.
10051 WRITE_BACK specifies whether the generated instruction should include an
10052 update of the base register.
10053 OFFSETP is used to pass an offset to and from this function; this offset
10054 is not used when constructing the address (instead BASEMEM should have an
10055 appropriate offset in its address), it is used only for setting
10056 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10059 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10060 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10062 rtx mems[MAX_LDM_STM_OPS];
10063 HOST_WIDE_INT offset = *offsetp;
10066 gcc_assert (count <= MAX_LDM_STM_OPS);
10068 if (GET_CODE (basereg) == PLUS)
10069 basereg = XEXP (basereg, 0);
10071 for (i = 0; i < count; i++)
10073 rtx addr = plus_constant (basereg, i * 4);
10074 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10082 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10083 write_back ? 4 * count : 0);
10085 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10086 write_back ? 4 * count : 0);
10090 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10091 rtx basemem, HOST_WIDE_INT *offsetp)
10093 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10098 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10099 rtx basemem, HOST_WIDE_INT *offsetp)
10101 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10105 /* Called from a peephole2 expander to turn a sequence of loads into an
10106 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10107 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10108 is true if we can reorder the registers because they are used commutatively
10110 Returns true iff we could generate a new instruction. */
10113 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10115 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10116 rtx mems[MAX_LDM_STM_OPS];
10117 int i, j, base_reg;
10119 HOST_WIDE_INT offset;
10120 int write_back = FALSE;
10124 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10125 &base_reg, &offset, !sort_regs);
10131 for (i = 0; i < nops - 1; i++)
10132 for (j = i + 1; j < nops; j++)
10133 if (regs[i] > regs[j])
10139 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10143 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10144 gcc_assert (ldm_case == 1 || ldm_case == 5);
10150 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10151 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10153 if (!TARGET_THUMB1)
10155 base_reg = regs[0];
10156 base_reg_rtx = newbase;
10160 for (i = 0; i < nops; i++)
10162 addr = plus_constant (base_reg_rtx, offset + i * 4);
10163 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10166 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10167 write_back ? offset + i * 4 : 0));
10171 /* Called from a peephole2 expander to turn a sequence of stores into an
10172 STM instruction. OPERANDS are the operands found by the peephole matcher;
10173 NOPS indicates how many separate stores we are trying to combine.
10174 Returns true iff we could generate a new instruction. */
10177 gen_stm_seq (rtx *operands, int nops)
10180 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10181 rtx mems[MAX_LDM_STM_OPS];
10184 HOST_WIDE_INT offset;
10185 int write_back = FALSE;
10188 bool base_reg_dies;
10190 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10191 mem_order, &base_reg, &offset, true);
10196 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10198 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10201 gcc_assert (base_reg_dies);
10207 gcc_assert (base_reg_dies);
10208 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10212 addr = plus_constant (base_reg_rtx, offset);
10214 for (i = 0; i < nops; i++)
10216 addr = plus_constant (base_reg_rtx, offset + i * 4);
10217 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10220 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10221 write_back ? offset + i * 4 : 0));
10225 /* Called from a peephole2 expander to turn a sequence of stores that are
10226 preceded by constant loads into an STM instruction. OPERANDS are the
10227 operands found by the peephole matcher; NOPS indicates how many
10228 separate stores we are trying to combine; there are 2 * NOPS
10229 instructions in the peephole.
10230 Returns true iff we could generate a new instruction. */
10233 gen_const_stm_seq (rtx *operands, int nops)
10235 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10236 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10237 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10238 rtx mems[MAX_LDM_STM_OPS];
10241 HOST_WIDE_INT offset;
10242 int write_back = FALSE;
10245 bool base_reg_dies;
10247 HARD_REG_SET allocated;
10249 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10250 mem_order, &base_reg, &offset, false);
10255 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10257 /* If the same register is used more than once, try to find a free
10259 CLEAR_HARD_REG_SET (allocated);
10260 for (i = 0; i < nops; i++)
10262 for (j = i + 1; j < nops; j++)
10263 if (regs[i] == regs[j])
10265 rtx t = peep2_find_free_register (0, nops * 2,
10266 TARGET_THUMB1 ? "l" : "r",
10267 SImode, &allocated);
10271 regs[i] = REGNO (t);
10275 /* Compute an ordering that maps the register numbers to an ascending
10278 for (i = 0; i < nops; i++)
10279 if (regs[i] < regs[reg_order[0]])
10282 for (i = 1; i < nops; i++)
10284 int this_order = reg_order[i - 1];
10285 for (j = 0; j < nops; j++)
10286 if (regs[j] > regs[reg_order[i - 1]]
10287 && (this_order == reg_order[i - 1]
10288 || regs[j] < regs[this_order]))
10290 reg_order[i] = this_order;
10293 /* Ensure that registers that must be live after the instruction end
10294 up with the correct value. */
10295 for (i = 0; i < nops; i++)
10297 int this_order = reg_order[i];
10298 if ((this_order != mem_order[i]
10299 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10300 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10304 /* Load the constants. */
10305 for (i = 0; i < nops; i++)
10307 rtx op = operands[2 * nops + mem_order[i]];
10308 sorted_regs[i] = regs[reg_order[i]];
10309 emit_move_insn (reg_rtxs[reg_order[i]], op);
10312 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10314 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10317 gcc_assert (base_reg_dies);
10323 gcc_assert (base_reg_dies);
10324 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10328 addr = plus_constant (base_reg_rtx, offset);
10330 for (i = 0; i < nops; i++)
10332 addr = plus_constant (base_reg_rtx, offset + i * 4);
10333 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10336 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10337 write_back ? offset + i * 4 : 0));
10342 arm_gen_movmemqi (rtx *operands)
10344 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10345 HOST_WIDE_INT srcoffset, dstoffset;
10347 rtx src, dst, srcbase, dstbase;
10348 rtx part_bytes_reg = NULL;
10351 if (GET_CODE (operands[2]) != CONST_INT
10352 || GET_CODE (operands[3]) != CONST_INT
10353 || INTVAL (operands[2]) > 64
10354 || INTVAL (operands[3]) & 3)
10357 dstbase = operands[0];
10358 srcbase = operands[1];
10360 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10361 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10363 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10364 out_words_to_go = INTVAL (operands[2]) / 4;
10365 last_bytes = INTVAL (operands[2]) & 3;
10366 dstoffset = srcoffset = 0;
10368 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10369 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10371 for (i = 0; in_words_to_go >= 2; i+=4)
10373 if (in_words_to_go > 4)
10374 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10375 TRUE, srcbase, &srcoffset));
10377 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10378 src, FALSE, srcbase,
10381 if (out_words_to_go)
10383 if (out_words_to_go > 4)
10384 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10385 TRUE, dstbase, &dstoffset));
10386 else if (out_words_to_go != 1)
10387 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10388 out_words_to_go, dst,
10391 dstbase, &dstoffset));
10394 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10395 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10396 if (last_bytes != 0)
10398 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10404 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10405 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10408 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10409 if (out_words_to_go)
10413 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10414 sreg = copy_to_reg (mem);
10416 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10417 emit_move_insn (mem, sreg);
10420 gcc_assert (!in_words_to_go); /* Sanity check */
10423 if (in_words_to_go)
10425 gcc_assert (in_words_to_go > 0);
10427 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10428 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10431 gcc_assert (!last_bytes || part_bytes_reg);
10433 if (BYTES_BIG_ENDIAN && last_bytes)
10435 rtx tmp = gen_reg_rtx (SImode);
10437 /* The bytes we want are in the top end of the word. */
10438 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10439 GEN_INT (8 * (4 - last_bytes))));
10440 part_bytes_reg = tmp;
10444 mem = adjust_automodify_address (dstbase, QImode,
10445 plus_constant (dst, last_bytes - 1),
10446 dstoffset + last_bytes - 1);
10447 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10451 tmp = gen_reg_rtx (SImode);
10452 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10453 part_bytes_reg = tmp;
10460 if (last_bytes > 1)
10462 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10463 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10467 rtx tmp = gen_reg_rtx (SImode);
10468 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10469 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10470 part_bytes_reg = tmp;
10477 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10478 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10485 /* Select a dominance comparison mode if possible for a test of the general
10486 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10487 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10488 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10489 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10490 In all cases OP will be either EQ or NE, but we don't need to know which
10491 here. If we are unable to support a dominance comparison we return
10492 CC mode. This will then fail to match for the RTL expressions that
10493 generate this call. */
10495 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10497 enum rtx_code cond1, cond2;
10500 /* Currently we will probably get the wrong result if the individual
10501 comparisons are not simple. This also ensures that it is safe to
10502 reverse a comparison if necessary. */
10503 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10505 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10509 /* The if_then_else variant of this tests the second condition if the
10510 first passes, but is true if the first fails. Reverse the first
10511 condition to get a true "inclusive-or" expression. */
10512 if (cond_or == DOM_CC_NX_OR_Y)
10513 cond1 = reverse_condition (cond1);
10515 /* If the comparisons are not equal, and one doesn't dominate the other,
10516 then we can't do this. */
10518 && !comparison_dominates_p (cond1, cond2)
10519 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10524 enum rtx_code temp = cond1;
10532 if (cond_or == DOM_CC_X_AND_Y)
10537 case EQ: return CC_DEQmode;
10538 case LE: return CC_DLEmode;
10539 case LEU: return CC_DLEUmode;
10540 case GE: return CC_DGEmode;
10541 case GEU: return CC_DGEUmode;
10542 default: gcc_unreachable ();
10546 if (cond_or == DOM_CC_X_AND_Y)
10558 gcc_unreachable ();
10562 if (cond_or == DOM_CC_X_AND_Y)
10574 gcc_unreachable ();
10578 if (cond_or == DOM_CC_X_AND_Y)
10579 return CC_DLTUmode;
10584 return CC_DLTUmode;
10586 return CC_DLEUmode;
10590 gcc_unreachable ();
10594 if (cond_or == DOM_CC_X_AND_Y)
10595 return CC_DGTUmode;
10600 return CC_DGTUmode;
10602 return CC_DGEUmode;
10606 gcc_unreachable ();
10609 /* The remaining cases only occur when both comparisons are the
10612 gcc_assert (cond1 == cond2);
10616 gcc_assert (cond1 == cond2);
10620 gcc_assert (cond1 == cond2);
10624 gcc_assert (cond1 == cond2);
10625 return CC_DLEUmode;
10628 gcc_assert (cond1 == cond2);
10629 return CC_DGEUmode;
10632 gcc_unreachable ();
10637 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10639 /* All floating point compares return CCFP if it is an equality
10640 comparison, and CCFPE otherwise. */
10641 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10661 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10666 gcc_unreachable ();
10670 /* A compare with a shifted operand. Because of canonicalization, the
10671 comparison will have to be swapped when we emit the assembler. */
10672 if (GET_MODE (y) == SImode
10673 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10674 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10675 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10676 || GET_CODE (x) == ROTATERT))
10679 /* This operation is performed swapped, but since we only rely on the Z
10680 flag we don't need an additional mode. */
10681 if (GET_MODE (y) == SImode
10682 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10683 && GET_CODE (x) == NEG
10684 && (op == EQ || op == NE))
10687 /* This is a special case that is used by combine to allow a
10688 comparison of a shifted byte load to be split into a zero-extend
10689 followed by a comparison of the shifted integer (only valid for
10690 equalities and unsigned inequalities). */
10691 if (GET_MODE (x) == SImode
10692 && GET_CODE (x) == ASHIFT
10693 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10694 && GET_CODE (XEXP (x, 0)) == SUBREG
10695 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10696 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10697 && (op == EQ || op == NE
10698 || op == GEU || op == GTU || op == LTU || op == LEU)
10699 && GET_CODE (y) == CONST_INT)
10702 /* A construct for a conditional compare, if the false arm contains
10703 0, then both conditions must be true, otherwise either condition
10704 must be true. Not all conditions are possible, so CCmode is
10705 returned if it can't be done. */
10706 if (GET_CODE (x) == IF_THEN_ELSE
10707 && (XEXP (x, 2) == const0_rtx
10708 || XEXP (x, 2) == const1_rtx)
10709 && COMPARISON_P (XEXP (x, 0))
10710 && COMPARISON_P (XEXP (x, 1)))
10711 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10712 INTVAL (XEXP (x, 2)));
10714 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10715 if (GET_CODE (x) == AND
10716 && (op == EQ || op == NE)
10717 && COMPARISON_P (XEXP (x, 0))
10718 && COMPARISON_P (XEXP (x, 1)))
10719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10722 if (GET_CODE (x) == IOR
10723 && (op == EQ || op == NE)
10724 && COMPARISON_P (XEXP (x, 0))
10725 && COMPARISON_P (XEXP (x, 1)))
10726 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10729 /* An operation (on Thumb) where we want to test for a single bit.
10730 This is done by shifting that bit up into the top bit of a
10731 scratch register; we can then branch on the sign bit. */
10733 && GET_MODE (x) == SImode
10734 && (op == EQ || op == NE)
10735 && GET_CODE (x) == ZERO_EXTRACT
10736 && XEXP (x, 1) == const1_rtx)
10739 /* An operation that sets the condition codes as a side-effect, the
10740 V flag is not set correctly, so we can only use comparisons where
10741 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10743 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10744 if (GET_MODE (x) == SImode
10746 && (op == EQ || op == NE || op == LT || op == GE)
10747 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10748 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10749 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10750 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10751 || GET_CODE (x) == LSHIFTRT
10752 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10753 || GET_CODE (x) == ROTATERT
10754 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10755 return CC_NOOVmode;
10757 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10760 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10761 && GET_CODE (x) == PLUS
10762 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10765 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10767 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10769 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10776 /* A DImode comparison against zero can be implemented by
10777 or'ing the two halves together. */
10778 if (y == const0_rtx)
10781 /* We can do an equality test in three Thumb instructions. */
10791 /* DImode unsigned comparisons can be implemented by cmp +
10792 cmpeq without a scratch register. Not worth doing in
10803 /* DImode signed and unsigned comparisons can be implemented
10804 by cmp + sbcs with a scratch register, but that does not
10805 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10806 gcc_assert (op != EQ && op != NE);
10810 gcc_unreachable ();
10817 /* X and Y are two things to compare using CODE. Emit the compare insn and
10818 return the rtx for register 0 in the proper mode. FP means this is a
10819 floating point compare: I don't think that it is needed on the arm. */
10821 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10823 enum machine_mode mode;
10825 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10827 /* We might have X as a constant, Y as a register because of the predicates
10828 used for cmpdi. If so, force X to a register here. */
10829 if (dimode_comparison && !REG_P (x))
10830 x = force_reg (DImode, x);
10832 mode = SELECT_CC_MODE (code, x, y);
10833 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10835 if (dimode_comparison
10836 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10837 && mode != CC_CZmode)
10841 /* To compare two non-zero values for equality, XOR them and
10842 then compare against zero. Not used for ARM mode; there
10843 CC_CZmode is cheaper. */
10844 if (mode == CC_Zmode && y != const0_rtx)
10846 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10849 /* A scratch register is required. */
10850 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10851 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10852 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10855 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10860 /* Generate a sequence of insns that will generate the correct return
10861 address mask depending on the physical architecture that the program
10864 arm_gen_return_addr_mask (void)
10866 rtx reg = gen_reg_rtx (Pmode);
10868 emit_insn (gen_return_addr_mask (reg));
10873 arm_reload_in_hi (rtx *operands)
10875 rtx ref = operands[1];
10877 HOST_WIDE_INT offset = 0;
10879 if (GET_CODE (ref) == SUBREG)
10881 offset = SUBREG_BYTE (ref);
10882 ref = SUBREG_REG (ref);
10885 if (GET_CODE (ref) == REG)
10887 /* We have a pseudo which has been spilt onto the stack; there
10888 are two cases here: the first where there is a simple
10889 stack-slot replacement and a second where the stack-slot is
10890 out of range, or is used as a subreg. */
10891 if (reg_equiv_mem (REGNO (ref)))
10893 ref = reg_equiv_mem (REGNO (ref));
10894 base = find_replacement (&XEXP (ref, 0));
10897 /* The slot is out of range, or was dressed up in a SUBREG. */
10898 base = reg_equiv_address (REGNO (ref));
10901 base = find_replacement (&XEXP (ref, 0));
10903 /* Handle the case where the address is too complex to be offset by 1. */
10904 if (GET_CODE (base) == MINUS
10905 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10907 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10909 emit_set_insn (base_plus, base);
10912 else if (GET_CODE (base) == PLUS)
10914 /* The addend must be CONST_INT, or we would have dealt with it above. */
10915 HOST_WIDE_INT hi, lo;
10917 offset += INTVAL (XEXP (base, 1));
10918 base = XEXP (base, 0);
10920 /* Rework the address into a legal sequence of insns. */
10921 /* Valid range for lo is -4095 -> 4095 */
10924 : -((-offset) & 0xfff));
10926 /* Corner case, if lo is the max offset then we would be out of range
10927 once we have added the additional 1 below, so bump the msb into the
10928 pre-loading insn(s). */
10932 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10933 ^ (HOST_WIDE_INT) 0x80000000)
10934 - (HOST_WIDE_INT) 0x80000000);
10936 gcc_assert (hi + lo == offset);
10940 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10942 /* Get the base address; addsi3 knows how to handle constants
10943 that require more than one insn. */
10944 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10950 /* Operands[2] may overlap operands[0] (though it won't overlap
10951 operands[1]), that's why we asked for a DImode reg -- so we can
10952 use the bit that does not overlap. */
10953 if (REGNO (operands[2]) == REGNO (operands[0]))
10954 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10956 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10958 emit_insn (gen_zero_extendqisi2 (scratch,
10959 gen_rtx_MEM (QImode,
10960 plus_constant (base,
10962 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10963 gen_rtx_MEM (QImode,
10964 plus_constant (base,
10966 if (!BYTES_BIG_ENDIAN)
10967 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10968 gen_rtx_IOR (SImode,
10971 gen_rtx_SUBREG (SImode, operands[0], 0),
10975 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10976 gen_rtx_IOR (SImode,
10977 gen_rtx_ASHIFT (SImode, scratch,
10979 gen_rtx_SUBREG (SImode, operands[0], 0)));
10982 /* Handle storing a half-word to memory during reload by synthesizing as two
10983 byte stores. Take care not to clobber the input values until after we
10984 have moved them somewhere safe. This code assumes that if the DImode
10985 scratch in operands[2] overlaps either the input value or output address
10986 in some way, then that value must die in this insn (we absolutely need
10987 two scratch registers for some corner cases). */
10989 arm_reload_out_hi (rtx *operands)
10991 rtx ref = operands[0];
10992 rtx outval = operands[1];
10994 HOST_WIDE_INT offset = 0;
10996 if (GET_CODE (ref) == SUBREG)
10998 offset = SUBREG_BYTE (ref);
10999 ref = SUBREG_REG (ref);
11002 if (GET_CODE (ref) == REG)
11004 /* We have a pseudo which has been spilt onto the stack; there
11005 are two cases here: the first where there is a simple
11006 stack-slot replacement and a second where the stack-slot is
11007 out of range, or is used as a subreg. */
11008 if (reg_equiv_mem (REGNO (ref)))
11010 ref = reg_equiv_mem (REGNO (ref));
11011 base = find_replacement (&XEXP (ref, 0));
11014 /* The slot is out of range, or was dressed up in a SUBREG. */
11015 base = reg_equiv_address (REGNO (ref));
11018 base = find_replacement (&XEXP (ref, 0));
11020 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11022 /* Handle the case where the address is too complex to be offset by 1. */
11023 if (GET_CODE (base) == MINUS
11024 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11026 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11028 /* Be careful not to destroy OUTVAL. */
11029 if (reg_overlap_mentioned_p (base_plus, outval))
11031 /* Updating base_plus might destroy outval, see if we can
11032 swap the scratch and base_plus. */
11033 if (!reg_overlap_mentioned_p (scratch, outval))
11036 scratch = base_plus;
11041 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11043 /* Be conservative and copy OUTVAL into the scratch now,
11044 this should only be necessary if outval is a subreg
11045 of something larger than a word. */
11046 /* XXX Might this clobber base? I can't see how it can,
11047 since scratch is known to overlap with OUTVAL, and
11048 must be wider than a word. */
11049 emit_insn (gen_movhi (scratch_hi, outval));
11050 outval = scratch_hi;
11054 emit_set_insn (base_plus, base);
11057 else if (GET_CODE (base) == PLUS)
11059 /* The addend must be CONST_INT, or we would have dealt with it above. */
11060 HOST_WIDE_INT hi, lo;
11062 offset += INTVAL (XEXP (base, 1));
11063 base = XEXP (base, 0);
11065 /* Rework the address into a legal sequence of insns. */
11066 /* Valid range for lo is -4095 -> 4095 */
11069 : -((-offset) & 0xfff));
11071 /* Corner case, if lo is the max offset then we would be out of range
11072 once we have added the additional 1 below, so bump the msb into the
11073 pre-loading insn(s). */
11077 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11078 ^ (HOST_WIDE_INT) 0x80000000)
11079 - (HOST_WIDE_INT) 0x80000000);
11081 gcc_assert (hi + lo == offset);
11085 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11087 /* Be careful not to destroy OUTVAL. */
11088 if (reg_overlap_mentioned_p (base_plus, outval))
11090 /* Updating base_plus might destroy outval, see if we
11091 can swap the scratch and base_plus. */
11092 if (!reg_overlap_mentioned_p (scratch, outval))
11095 scratch = base_plus;
11100 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11102 /* Be conservative and copy outval into scratch now,
11103 this should only be necessary if outval is a
11104 subreg of something larger than a word. */
11105 /* XXX Might this clobber base? I can't see how it
11106 can, since scratch is known to overlap with
11108 emit_insn (gen_movhi (scratch_hi, outval));
11109 outval = scratch_hi;
11113 /* Get the base address; addsi3 knows how to handle constants
11114 that require more than one insn. */
11115 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11121 if (BYTES_BIG_ENDIAN)
11123 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11124 plus_constant (base, offset + 1)),
11125 gen_lowpart (QImode, outval)));
11126 emit_insn (gen_lshrsi3 (scratch,
11127 gen_rtx_SUBREG (SImode, outval, 0),
11129 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11130 gen_lowpart (QImode, scratch)));
11134 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11135 gen_lowpart (QImode, outval)));
11136 emit_insn (gen_lshrsi3 (scratch,
11137 gen_rtx_SUBREG (SImode, outval, 0),
11139 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11140 plus_constant (base, offset + 1)),
11141 gen_lowpart (QImode, scratch)));
11145 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11146 (padded to the size of a word) should be passed in a register. */
11149 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11151 if (TARGET_AAPCS_BASED)
11152 return must_pass_in_stack_var_size (mode, type);
11154 return must_pass_in_stack_var_size_or_pad (mode, type);
11158 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11159 Return true if an argument passed on the stack should be padded upwards,
11160 i.e. if the least-significant byte has useful data.
11161 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11162 aggregate types are placed in the lowest memory address. */
11165 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11167 if (!TARGET_AAPCS_BASED)
11168 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11170 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11177 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11178 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11179 byte of the register has useful data, and return the opposite if the
11180 most significant byte does.
11181 For AAPCS, small aggregates and small complex types are always padded
11185 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11186 tree type, int first ATTRIBUTE_UNUSED)
11188 if (TARGET_AAPCS_BASED
11189 && BYTES_BIG_ENDIAN
11190 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11191 && int_size_in_bytes (type) <= 4)
11194 /* Otherwise, use default padding. */
11195 return !BYTES_BIG_ENDIAN;
11199 /* Print a symbolic form of X to the debug file, F. */
11201 arm_print_value (FILE *f, rtx x)
11203 switch (GET_CODE (x))
11206 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11210 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11218 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11220 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11221 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11229 fprintf (f, "\"%s\"", XSTR (x, 0));
11233 fprintf (f, "`%s'", XSTR (x, 0));
11237 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11241 arm_print_value (f, XEXP (x, 0));
11245 arm_print_value (f, XEXP (x, 0));
11247 arm_print_value (f, XEXP (x, 1));
11255 fprintf (f, "????");
11260 /* Routines for manipulation of the constant pool. */
11262 /* Arm instructions cannot load a large constant directly into a
11263 register; they have to come from a pc relative load. The constant
11264 must therefore be placed in the addressable range of the pc
11265 relative load. Depending on the precise pc relative load
11266 instruction the range is somewhere between 256 bytes and 4k. This
11267 means that we often have to dump a constant inside a function, and
11268 generate code to branch around it.
11270 It is important to minimize this, since the branches will slow
11271 things down and make the code larger.
11273 Normally we can hide the table after an existing unconditional
11274 branch so that there is no interruption of the flow, but in the
11275 worst case the code looks like this:
11293 We fix this by performing a scan after scheduling, which notices
11294 which instructions need to have their operands fetched from the
11295 constant table and builds the table.
11297 The algorithm starts by building a table of all the constants that
11298 need fixing up and all the natural barriers in the function (places
11299 where a constant table can be dropped without breaking the flow).
11300 For each fixup we note how far the pc-relative replacement will be
11301 able to reach and the offset of the instruction into the function.
11303 Having built the table we then group the fixes together to form
11304 tables that are as large as possible (subject to addressing
11305 constraints) and emit each table of constants after the last
11306 barrier that is within range of all the instructions in the group.
11307 If a group does not contain a barrier, then we forcibly create one
11308 by inserting a jump instruction into the flow. Once the table has
11309 been inserted, the insns are then modified to reference the
11310 relevant entry in the pool.
11312 Possible enhancements to the algorithm (not implemented) are:
11314 1) For some processors and object formats, there may be benefit in
11315 aligning the pools to the start of cache lines; this alignment
11316 would need to be taken into account when calculating addressability
11319 /* These typedefs are located at the start of this file, so that
11320 they can be used in the prototypes there. This comment is to
11321 remind readers of that fact so that the following structures
11322 can be understood more easily.
11324 typedef struct minipool_node Mnode;
11325 typedef struct minipool_fixup Mfix; */
11327 struct minipool_node
11329 /* Doubly linked chain of entries. */
11332 /* The maximum offset into the code that this entry can be placed. While
11333 pushing fixes for forward references, all entries are sorted in order
11334 of increasing max_address. */
11335 HOST_WIDE_INT max_address;
11336 /* Similarly for an entry inserted for a backwards ref. */
11337 HOST_WIDE_INT min_address;
11338 /* The number of fixes referencing this entry. This can become zero
11339 if we "unpush" an entry. In this case we ignore the entry when we
11340 come to emit the code. */
11342 /* The offset from the start of the minipool. */
11343 HOST_WIDE_INT offset;
11344 /* The value in table. */
11346 /* The mode of value. */
11347 enum machine_mode mode;
11348 /* The size of the value. With iWMMXt enabled
11349 sizes > 4 also imply an alignment of 8-bytes. */
11353 struct minipool_fixup
11357 HOST_WIDE_INT address;
11359 enum machine_mode mode;
11363 HOST_WIDE_INT forwards;
11364 HOST_WIDE_INT backwards;
11367 /* Fixes less than a word need padding out to a word boundary. */
11368 #define MINIPOOL_FIX_SIZE(mode) \
11369 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11371 static Mnode * minipool_vector_head;
11372 static Mnode * minipool_vector_tail;
11373 static rtx minipool_vector_label;
11374 static int minipool_pad;
11376 /* The linked list of all minipool fixes required for this function. */
11377 Mfix * minipool_fix_head;
11378 Mfix * minipool_fix_tail;
11379 /* The fix entry for the current minipool, once it has been placed. */
11380 Mfix * minipool_barrier;
11382 /* Determines if INSN is the start of a jump table. Returns the end
11383 of the TABLE or NULL_RTX. */
11385 is_jump_table (rtx insn)
11389 if (GET_CODE (insn) == JUMP_INSN
11390 && JUMP_LABEL (insn) != NULL
11391 && ((table = next_real_insn (JUMP_LABEL (insn)))
11392 == next_real_insn (insn))
11394 && GET_CODE (table) == JUMP_INSN
11395 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11396 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11402 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11403 #define JUMP_TABLES_IN_TEXT_SECTION 0
11406 static HOST_WIDE_INT
11407 get_jump_table_size (rtx insn)
11409 /* ADDR_VECs only take room if read-only data does into the text
11411 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11413 rtx body = PATTERN (insn);
11414 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11415 HOST_WIDE_INT size;
11416 HOST_WIDE_INT modesize;
11418 modesize = GET_MODE_SIZE (GET_MODE (body));
11419 size = modesize * XVECLEN (body, elt);
11423 /* Round up size of TBB table to a halfword boundary. */
11424 size = (size + 1) & ~(HOST_WIDE_INT)1;
11427 /* No padding necessary for TBH. */
11430 /* Add two bytes for alignment on Thumb. */
11435 gcc_unreachable ();
11443 /* Move a minipool fix MP from its current location to before MAX_MP.
11444 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11445 constraints may need updating. */
11447 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11448 HOST_WIDE_INT max_address)
11450 /* The code below assumes these are different. */
11451 gcc_assert (mp != max_mp);
11453 if (max_mp == NULL)
11455 if (max_address < mp->max_address)
11456 mp->max_address = max_address;
11460 if (max_address > max_mp->max_address - mp->fix_size)
11461 mp->max_address = max_mp->max_address - mp->fix_size;
11463 mp->max_address = max_address;
11465 /* Unlink MP from its current position. Since max_mp is non-null,
11466 mp->prev must be non-null. */
11467 mp->prev->next = mp->next;
11468 if (mp->next != NULL)
11469 mp->next->prev = mp->prev;
11471 minipool_vector_tail = mp->prev;
11473 /* Re-insert it before MAX_MP. */
11475 mp->prev = max_mp->prev;
11478 if (mp->prev != NULL)
11479 mp->prev->next = mp;
11481 minipool_vector_head = mp;
11484 /* Save the new entry. */
11487 /* Scan over the preceding entries and adjust their addresses as
11489 while (mp->prev != NULL
11490 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11492 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11499 /* Add a constant to the minipool for a forward reference. Returns the
11500 node added or NULL if the constant will not fit in this pool. */
11502 add_minipool_forward_ref (Mfix *fix)
11504 /* If set, max_mp is the first pool_entry that has a lower
11505 constraint than the one we are trying to add. */
11506 Mnode * max_mp = NULL;
11507 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11510 /* If the minipool starts before the end of FIX->INSN then this FIX
11511 can not be placed into the current pool. Furthermore, adding the
11512 new constant pool entry may cause the pool to start FIX_SIZE bytes
11514 if (minipool_vector_head &&
11515 (fix->address + get_attr_length (fix->insn)
11516 >= minipool_vector_head->max_address - fix->fix_size))
11519 /* Scan the pool to see if a constant with the same value has
11520 already been added. While we are doing this, also note the
11521 location where we must insert the constant if it doesn't already
11523 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11525 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11526 && fix->mode == mp->mode
11527 && (GET_CODE (fix->value) != CODE_LABEL
11528 || (CODE_LABEL_NUMBER (fix->value)
11529 == CODE_LABEL_NUMBER (mp->value)))
11530 && rtx_equal_p (fix->value, mp->value))
11532 /* More than one fix references this entry. */
11534 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11537 /* Note the insertion point if necessary. */
11539 && mp->max_address > max_address)
11542 /* If we are inserting an 8-bytes aligned quantity and
11543 we have not already found an insertion point, then
11544 make sure that all such 8-byte aligned quantities are
11545 placed at the start of the pool. */
11546 if (ARM_DOUBLEWORD_ALIGN
11548 && fix->fix_size >= 8
11549 && mp->fix_size < 8)
11552 max_address = mp->max_address;
11556 /* The value is not currently in the minipool, so we need to create
11557 a new entry for it. If MAX_MP is NULL, the entry will be put on
11558 the end of the list since the placement is less constrained than
11559 any existing entry. Otherwise, we insert the new fix before
11560 MAX_MP and, if necessary, adjust the constraints on the other
11563 mp->fix_size = fix->fix_size;
11564 mp->mode = fix->mode;
11565 mp->value = fix->value;
11567 /* Not yet required for a backwards ref. */
11568 mp->min_address = -65536;
11570 if (max_mp == NULL)
11572 mp->max_address = max_address;
11574 mp->prev = minipool_vector_tail;
11576 if (mp->prev == NULL)
11578 minipool_vector_head = mp;
11579 minipool_vector_label = gen_label_rtx ();
11582 mp->prev->next = mp;
11584 minipool_vector_tail = mp;
11588 if (max_address > max_mp->max_address - mp->fix_size)
11589 mp->max_address = max_mp->max_address - mp->fix_size;
11591 mp->max_address = max_address;
11594 mp->prev = max_mp->prev;
11596 if (mp->prev != NULL)
11597 mp->prev->next = mp;
11599 minipool_vector_head = mp;
11602 /* Save the new entry. */
11605 /* Scan over the preceding entries and adjust their addresses as
11607 while (mp->prev != NULL
11608 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11610 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11618 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11619 HOST_WIDE_INT min_address)
11621 HOST_WIDE_INT offset;
11623 /* The code below assumes these are different. */
11624 gcc_assert (mp != min_mp);
11626 if (min_mp == NULL)
11628 if (min_address > mp->min_address)
11629 mp->min_address = min_address;
11633 /* We will adjust this below if it is too loose. */
11634 mp->min_address = min_address;
11636 /* Unlink MP from its current position. Since min_mp is non-null,
11637 mp->next must be non-null. */
11638 mp->next->prev = mp->prev;
11639 if (mp->prev != NULL)
11640 mp->prev->next = mp->next;
11642 minipool_vector_head = mp->next;
11644 /* Reinsert it after MIN_MP. */
11646 mp->next = min_mp->next;
11648 if (mp->next != NULL)
11649 mp->next->prev = mp;
11651 minipool_vector_tail = mp;
11657 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11659 mp->offset = offset;
11660 if (mp->refcount > 0)
11661 offset += mp->fix_size;
11663 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11664 mp->next->min_address = mp->min_address + mp->fix_size;
11670 /* Add a constant to the minipool for a backward reference. Returns the
11671 node added or NULL if the constant will not fit in this pool.
11673 Note that the code for insertion for a backwards reference can be
11674 somewhat confusing because the calculated offsets for each fix do
11675 not take into account the size of the pool (which is still under
11678 add_minipool_backward_ref (Mfix *fix)
11680 /* If set, min_mp is the last pool_entry that has a lower constraint
11681 than the one we are trying to add. */
11682 Mnode *min_mp = NULL;
11683 /* This can be negative, since it is only a constraint. */
11684 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11687 /* If we can't reach the current pool from this insn, or if we can't
11688 insert this entry at the end of the pool without pushing other
11689 fixes out of range, then we don't try. This ensures that we
11690 can't fail later on. */
11691 if (min_address >= minipool_barrier->address
11692 || (minipool_vector_tail->min_address + fix->fix_size
11693 >= minipool_barrier->address))
11696 /* Scan the pool to see if a constant with the same value has
11697 already been added. While we are doing this, also note the
11698 location where we must insert the constant if it doesn't already
11700 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11702 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11703 && fix->mode == mp->mode
11704 && (GET_CODE (fix->value) != CODE_LABEL
11705 || (CODE_LABEL_NUMBER (fix->value)
11706 == CODE_LABEL_NUMBER (mp->value)))
11707 && rtx_equal_p (fix->value, mp->value)
11708 /* Check that there is enough slack to move this entry to the
11709 end of the table (this is conservative). */
11710 && (mp->max_address
11711 > (minipool_barrier->address
11712 + minipool_vector_tail->offset
11713 + minipool_vector_tail->fix_size)))
11716 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11719 if (min_mp != NULL)
11720 mp->min_address += fix->fix_size;
11723 /* Note the insertion point if necessary. */
11724 if (mp->min_address < min_address)
11726 /* For now, we do not allow the insertion of 8-byte alignment
11727 requiring nodes anywhere but at the start of the pool. */
11728 if (ARM_DOUBLEWORD_ALIGN
11729 && fix->fix_size >= 8 && mp->fix_size < 8)
11734 else if (mp->max_address
11735 < minipool_barrier->address + mp->offset + fix->fix_size)
11737 /* Inserting before this entry would push the fix beyond
11738 its maximum address (which can happen if we have
11739 re-located a forwards fix); force the new fix to come
11741 if (ARM_DOUBLEWORD_ALIGN
11742 && fix->fix_size >= 8 && mp->fix_size < 8)
11747 min_address = mp->min_address + fix->fix_size;
11750 /* Do not insert a non-8-byte aligned quantity before 8-byte
11751 aligned quantities. */
11752 else if (ARM_DOUBLEWORD_ALIGN
11753 && fix->fix_size < 8
11754 && mp->fix_size >= 8)
11757 min_address = mp->min_address + fix->fix_size;
11762 /* We need to create a new entry. */
11764 mp->fix_size = fix->fix_size;
11765 mp->mode = fix->mode;
11766 mp->value = fix->value;
11768 mp->max_address = minipool_barrier->address + 65536;
11770 mp->min_address = min_address;
11772 if (min_mp == NULL)
11775 mp->next = minipool_vector_head;
11777 if (mp->next == NULL)
11779 minipool_vector_tail = mp;
11780 minipool_vector_label = gen_label_rtx ();
11783 mp->next->prev = mp;
11785 minipool_vector_head = mp;
11789 mp->next = min_mp->next;
11793 if (mp->next != NULL)
11794 mp->next->prev = mp;
11796 minipool_vector_tail = mp;
11799 /* Save the new entry. */
11807 /* Scan over the following entries and adjust their offsets. */
11808 while (mp->next != NULL)
11810 if (mp->next->min_address < mp->min_address + mp->fix_size)
11811 mp->next->min_address = mp->min_address + mp->fix_size;
11814 mp->next->offset = mp->offset + mp->fix_size;
11816 mp->next->offset = mp->offset;
11825 assign_minipool_offsets (Mfix *barrier)
11827 HOST_WIDE_INT offset = 0;
11830 minipool_barrier = barrier;
11832 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11834 mp->offset = offset;
11836 if (mp->refcount > 0)
11837 offset += mp->fix_size;
11841 /* Output the literal table */
11843 dump_minipool (rtx scan)
11849 if (ARM_DOUBLEWORD_ALIGN)
11850 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11851 if (mp->refcount > 0 && mp->fix_size >= 8)
11858 fprintf (dump_file,
11859 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11860 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11862 scan = emit_label_after (gen_label_rtx (), scan);
11863 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11864 scan = emit_label_after (minipool_vector_label, scan);
11866 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11868 if (mp->refcount > 0)
11872 fprintf (dump_file,
11873 ";; Offset %u, min %ld, max %ld ",
11874 (unsigned) mp->offset, (unsigned long) mp->min_address,
11875 (unsigned long) mp->max_address);
11876 arm_print_value (dump_file, mp->value);
11877 fputc ('\n', dump_file);
11880 switch (mp->fix_size)
11882 #ifdef HAVE_consttable_1
11884 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11888 #ifdef HAVE_consttable_2
11890 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11894 #ifdef HAVE_consttable_4
11896 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11900 #ifdef HAVE_consttable_8
11902 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11906 #ifdef HAVE_consttable_16
11908 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11913 gcc_unreachable ();
11921 minipool_vector_head = minipool_vector_tail = NULL;
11922 scan = emit_insn_after (gen_consttable_end (), scan);
11923 scan = emit_barrier_after (scan);
11926 /* Return the cost of forcibly inserting a barrier after INSN. */
11928 arm_barrier_cost (rtx insn)
11930 /* Basing the location of the pool on the loop depth is preferable,
11931 but at the moment, the basic block information seems to be
11932 corrupt by this stage of the compilation. */
11933 int base_cost = 50;
11934 rtx next = next_nonnote_insn (insn);
11936 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11939 switch (GET_CODE (insn))
11942 /* It will always be better to place the table before the label, rather
11951 return base_cost - 10;
11954 return base_cost + 10;
11958 /* Find the best place in the insn stream in the range
11959 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11960 Create the barrier by inserting a jump and add a new fix entry for
11963 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11965 HOST_WIDE_INT count = 0;
11967 rtx from = fix->insn;
11968 /* The instruction after which we will insert the jump. */
11969 rtx selected = NULL;
11971 /* The address at which the jump instruction will be placed. */
11972 HOST_WIDE_INT selected_address;
11974 HOST_WIDE_INT max_count = max_address - fix->address;
11975 rtx label = gen_label_rtx ();
11977 selected_cost = arm_barrier_cost (from);
11978 selected_address = fix->address;
11980 while (from && count < max_count)
11985 /* This code shouldn't have been called if there was a natural barrier
11987 gcc_assert (GET_CODE (from) != BARRIER);
11989 /* Count the length of this insn. */
11990 count += get_attr_length (from);
11992 /* If there is a jump table, add its length. */
11993 tmp = is_jump_table (from);
11996 count += get_jump_table_size (tmp);
11998 /* Jump tables aren't in a basic block, so base the cost on
11999 the dispatch insn. If we select this location, we will
12000 still put the pool after the table. */
12001 new_cost = arm_barrier_cost (from);
12003 if (count < max_count
12004 && (!selected || new_cost <= selected_cost))
12007 selected_cost = new_cost;
12008 selected_address = fix->address + count;
12011 /* Continue after the dispatch table. */
12012 from = NEXT_INSN (tmp);
12016 new_cost = arm_barrier_cost (from);
12018 if (count < max_count
12019 && (!selected || new_cost <= selected_cost))
12022 selected_cost = new_cost;
12023 selected_address = fix->address + count;
12026 from = NEXT_INSN (from);
12029 /* Make sure that we found a place to insert the jump. */
12030 gcc_assert (selected);
12032 /* Make sure we do not split a call and its corresponding
12033 CALL_ARG_LOCATION note. */
12034 if (CALL_P (selected))
12036 rtx next = NEXT_INSN (selected);
12037 if (next && NOTE_P (next)
12038 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12042 /* Create a new JUMP_INSN that branches around a barrier. */
12043 from = emit_jump_insn_after (gen_jump (label), selected);
12044 JUMP_LABEL (from) = label;
12045 barrier = emit_barrier_after (from);
12046 emit_label_after (label, barrier);
12048 /* Create a minipool barrier entry for the new barrier. */
12049 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12050 new_fix->insn = barrier;
12051 new_fix->address = selected_address;
12052 new_fix->next = fix->next;
12053 fix->next = new_fix;
12058 /* Record that there is a natural barrier in the insn stream at
12061 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12063 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12066 fix->address = address;
12069 if (minipool_fix_head != NULL)
12070 minipool_fix_tail->next = fix;
12072 minipool_fix_head = fix;
12074 minipool_fix_tail = fix;
12077 /* Record INSN, which will need fixing up to load a value from the
12078 minipool. ADDRESS is the offset of the insn since the start of the
12079 function; LOC is a pointer to the part of the insn which requires
12080 fixing; VALUE is the constant that must be loaded, which is of type
12083 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12084 enum machine_mode mode, rtx value)
12086 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12089 fix->address = address;
12092 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12093 fix->value = value;
12094 fix->forwards = get_attr_pool_range (insn);
12095 fix->backwards = get_attr_neg_pool_range (insn);
12096 fix->minipool = NULL;
12098 /* If an insn doesn't have a range defined for it, then it isn't
12099 expecting to be reworked by this code. Better to stop now than
12100 to generate duff assembly code. */
12101 gcc_assert (fix->forwards || fix->backwards);
12103 /* If an entry requires 8-byte alignment then assume all constant pools
12104 require 4 bytes of padding. Trying to do this later on a per-pool
12105 basis is awkward because existing pool entries have to be modified. */
12106 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12111 fprintf (dump_file,
12112 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12113 GET_MODE_NAME (mode),
12114 INSN_UID (insn), (unsigned long) address,
12115 -1 * (long)fix->backwards, (long)fix->forwards);
12116 arm_print_value (dump_file, fix->value);
12117 fprintf (dump_file, "\n");
12120 /* Add it to the chain of fixes. */
12123 if (minipool_fix_head != NULL)
12124 minipool_fix_tail->next = fix;
12126 minipool_fix_head = fix;
12128 minipool_fix_tail = fix;
12131 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12132 Returns the number of insns needed, or 99 if we don't know how to
12135 arm_const_double_inline_cost (rtx val)
12137 rtx lowpart, highpart;
12138 enum machine_mode mode;
12140 mode = GET_MODE (val);
12142 if (mode == VOIDmode)
12145 gcc_assert (GET_MODE_SIZE (mode) == 8);
12147 lowpart = gen_lowpart (SImode, val);
12148 highpart = gen_highpart_mode (SImode, mode, val);
12150 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12151 gcc_assert (GET_CODE (highpart) == CONST_INT);
12153 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12154 NULL_RTX, NULL_RTX, 0, 0)
12155 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12156 NULL_RTX, NULL_RTX, 0, 0));
12159 /* Return true if it is worthwhile to split a 64-bit constant into two
12160 32-bit operations. This is the case if optimizing for size, or
12161 if we have load delay slots, or if one 32-bit part can be done with
12162 a single data operation. */
12164 arm_const_double_by_parts (rtx val)
12166 enum machine_mode mode = GET_MODE (val);
12169 if (optimize_size || arm_ld_sched)
12172 if (mode == VOIDmode)
12175 part = gen_highpart_mode (SImode, mode, val);
12177 gcc_assert (GET_CODE (part) == CONST_INT);
12179 if (const_ok_for_arm (INTVAL (part))
12180 || const_ok_for_arm (~INTVAL (part)))
12183 part = gen_lowpart (SImode, val);
12185 gcc_assert (GET_CODE (part) == CONST_INT);
12187 if (const_ok_for_arm (INTVAL (part))
12188 || const_ok_for_arm (~INTVAL (part)))
12194 /* Return true if it is possible to inline both the high and low parts
12195 of a 64-bit constant into 32-bit data processing instructions. */
12197 arm_const_double_by_immediates (rtx val)
12199 enum machine_mode mode = GET_MODE (val);
12202 if (mode == VOIDmode)
12205 part = gen_highpart_mode (SImode, mode, val);
12207 gcc_assert (GET_CODE (part) == CONST_INT);
12209 if (!const_ok_for_arm (INTVAL (part)))
12212 part = gen_lowpart (SImode, val);
12214 gcc_assert (GET_CODE (part) == CONST_INT);
12216 if (!const_ok_for_arm (INTVAL (part)))
12222 /* Scan INSN and note any of its operands that need fixing.
12223 If DO_PUSHES is false we do not actually push any of the fixups
12224 needed. The function returns TRUE if any fixups were needed/pushed.
12225 This is used by arm_memory_load_p() which needs to know about loads
12226 of constants that will be converted into minipool loads. */
12228 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12230 bool result = false;
12233 extract_insn (insn);
12235 if (!constrain_operands (1))
12236 fatal_insn_not_found (insn);
12238 if (recog_data.n_alternatives == 0)
12241 /* Fill in recog_op_alt with information about the constraints of
12243 preprocess_constraints ();
12245 for (opno = 0; opno < recog_data.n_operands; opno++)
12247 /* Things we need to fix can only occur in inputs. */
12248 if (recog_data.operand_type[opno] != OP_IN)
12251 /* If this alternative is a memory reference, then any mention
12252 of constants in this alternative is really to fool reload
12253 into allowing us to accept one there. We need to fix them up
12254 now so that we output the right code. */
12255 if (recog_op_alt[opno][which_alternative].memory_ok)
12257 rtx op = recog_data.operand[opno];
12259 if (CONSTANT_P (op))
12262 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12263 recog_data.operand_mode[opno], op);
12266 else if (GET_CODE (op) == MEM
12267 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12268 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12272 rtx cop = avoid_constant_pool_reference (op);
12274 /* Casting the address of something to a mode narrower
12275 than a word can cause avoid_constant_pool_reference()
12276 to return the pool reference itself. That's no good to
12277 us here. Lets just hope that we can use the
12278 constant pool value directly. */
12280 cop = get_pool_constant (XEXP (op, 0));
12282 push_minipool_fix (insn, address,
12283 recog_data.operand_loc[opno],
12284 recog_data.operand_mode[opno], cop);
12295 /* Convert instructions to their cc-clobbering variant if possible, since
12296 that allows us to use smaller encodings. */
12299 thumb2_reorg (void)
12304 INIT_REG_SET (&live);
12306 /* We are freeing block_for_insn in the toplev to keep compatibility
12307 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12308 compute_bb_for_insn ();
12315 COPY_REG_SET (&live, DF_LR_OUT (bb));
12316 df_simulate_initialize_backwards (bb, &live);
12317 FOR_BB_INSNS_REVERSE (bb, insn)
12319 if (NONJUMP_INSN_P (insn)
12320 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12322 rtx pat = PATTERN (insn);
12323 if (GET_CODE (pat) == SET
12324 && low_register_operand (XEXP (pat, 0), SImode)
12325 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12326 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12327 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12329 rtx dst = XEXP (pat, 0);
12330 rtx src = XEXP (pat, 1);
12331 rtx op0 = XEXP (src, 0);
12332 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12333 ? XEXP (src, 1) : NULL);
12335 if (rtx_equal_p (dst, op0)
12336 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12338 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12339 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12340 rtvec vec = gen_rtvec (2, pat, clobber);
12342 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12343 INSN_CODE (insn) = -1;
12345 /* We can also handle a commutative operation where the
12346 second operand matches the destination. */
12347 else if (op1 && rtx_equal_p (dst, op1))
12349 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12350 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12353 src = copy_rtx (src);
12354 XEXP (src, 0) = op1;
12355 XEXP (src, 1) = op0;
12356 pat = gen_rtx_SET (VOIDmode, dst, src);
12357 vec = gen_rtvec (2, pat, clobber);
12358 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12359 INSN_CODE (insn) = -1;
12364 if (NONDEBUG_INSN_P (insn))
12365 df_simulate_one_insn_backwards (bb, insn, &live);
12369 CLEAR_REG_SET (&live);
12372 /* Gcc puts the pool in the wrong place for ARM, since we can only
12373 load addresses a limited distance around the pc. We do some
12374 special munging to move the constant pool values to the correct
12375 point in the code. */
12380 HOST_WIDE_INT address = 0;
12386 minipool_fix_head = minipool_fix_tail = NULL;
12388 /* The first insn must always be a note, or the code below won't
12389 scan it properly. */
12390 insn = get_insns ();
12391 gcc_assert (GET_CODE (insn) == NOTE);
12394 /* Scan all the insns and record the operands that will need fixing. */
12395 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12397 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12398 && (arm_cirrus_insn_p (insn)
12399 || GET_CODE (insn) == JUMP_INSN
12400 || arm_memory_load_p (insn)))
12401 cirrus_reorg (insn);
12403 if (GET_CODE (insn) == BARRIER)
12404 push_minipool_barrier (insn, address);
12405 else if (INSN_P (insn))
12409 note_invalid_constants (insn, address, true);
12410 address += get_attr_length (insn);
12412 /* If the insn is a vector jump, add the size of the table
12413 and skip the table. */
12414 if ((table = is_jump_table (insn)) != NULL)
12416 address += get_jump_table_size (table);
12422 fix = minipool_fix_head;
12424 /* Now scan the fixups and perform the required changes. */
12429 Mfix * last_added_fix;
12430 Mfix * last_barrier = NULL;
12433 /* Skip any further barriers before the next fix. */
12434 while (fix && GET_CODE (fix->insn) == BARRIER)
12437 /* No more fixes. */
12441 last_added_fix = NULL;
12443 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12445 if (GET_CODE (ftmp->insn) == BARRIER)
12447 if (ftmp->address >= minipool_vector_head->max_address)
12450 last_barrier = ftmp;
12452 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12455 last_added_fix = ftmp; /* Keep track of the last fix added. */
12458 /* If we found a barrier, drop back to that; any fixes that we
12459 could have reached but come after the barrier will now go in
12460 the next mini-pool. */
12461 if (last_barrier != NULL)
12463 /* Reduce the refcount for those fixes that won't go into this
12465 for (fdel = last_barrier->next;
12466 fdel && fdel != ftmp;
12469 fdel->minipool->refcount--;
12470 fdel->minipool = NULL;
12473 ftmp = last_barrier;
12477 /* ftmp is first fix that we can't fit into this pool and
12478 there no natural barriers that we could use. Insert a
12479 new barrier in the code somewhere between the previous
12480 fix and this one, and arrange to jump around it. */
12481 HOST_WIDE_INT max_address;
12483 /* The last item on the list of fixes must be a barrier, so
12484 we can never run off the end of the list of fixes without
12485 last_barrier being set. */
12488 max_address = minipool_vector_head->max_address;
12489 /* Check that there isn't another fix that is in range that
12490 we couldn't fit into this pool because the pool was
12491 already too large: we need to put the pool before such an
12492 instruction. The pool itself may come just after the
12493 fix because create_fix_barrier also allows space for a
12494 jump instruction. */
12495 if (ftmp->address < max_address)
12496 max_address = ftmp->address + 1;
12498 last_barrier = create_fix_barrier (last_added_fix, max_address);
12501 assign_minipool_offsets (last_barrier);
12505 if (GET_CODE (ftmp->insn) != BARRIER
12506 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12513 /* Scan over the fixes we have identified for this pool, fixing them
12514 up and adding the constants to the pool itself. */
12515 for (this_fix = fix; this_fix && ftmp != this_fix;
12516 this_fix = this_fix->next)
12517 if (GET_CODE (this_fix->insn) != BARRIER)
12520 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12521 minipool_vector_label),
12522 this_fix->minipool->offset);
12523 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12526 dump_minipool (last_barrier->insn);
12530 /* From now on we must synthesize any constants that we can't handle
12531 directly. This can happen if the RTL gets split during final
12532 instruction generation. */
12533 after_arm_reorg = 1;
12535 /* Free the minipool memory. */
12536 obstack_free (&minipool_obstack, minipool_startobj);
12539 /* Routines to output assembly language. */
12541 /* If the rtx is the correct value then return the string of the number.
12542 In this way we can ensure that valid double constants are generated even
12543 when cross compiling. */
12545 fp_immediate_constant (rtx x)
12550 if (!fp_consts_inited)
12553 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12554 for (i = 0; i < 8; i++)
12555 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12556 return strings_fp[i];
12558 gcc_unreachable ();
12561 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12562 static const char *
12563 fp_const_from_val (REAL_VALUE_TYPE *r)
12567 if (!fp_consts_inited)
12570 for (i = 0; i < 8; i++)
12571 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12572 return strings_fp[i];
12574 gcc_unreachable ();
12577 /* Output the operands of a LDM/STM instruction to STREAM.
12578 MASK is the ARM register set mask of which only bits 0-15 are important.
12579 REG is the base register, either the frame pointer or the stack pointer,
12580 INSTR is the possibly suffixed load or store instruction.
12581 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12584 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12585 unsigned long mask, int rfe)
12588 bool not_first = FALSE;
12590 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12591 fputc ('\t', stream);
12592 asm_fprintf (stream, instr, reg);
12593 fputc ('{', stream);
12595 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12596 if (mask & (1 << i))
12599 fprintf (stream, ", ");
12601 asm_fprintf (stream, "%r", i);
12606 fprintf (stream, "}^\n");
12608 fprintf (stream, "}\n");
12612 /* Output a FLDMD instruction to STREAM.
12613 BASE if the register containing the address.
12614 REG and COUNT specify the register range.
12615 Extra registers may be added to avoid hardware bugs.
12617 We output FLDMD even for ARMv5 VFP implementations. Although
12618 FLDMD is technically not supported until ARMv6, it is believed
12619 that all VFP implementations support its use in this context. */
12622 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12626 /* Workaround ARM10 VFPr1 bug. */
12627 if (count == 2 && !arm_arch6)
12634 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12635 load into multiple parts if we have to handle more than 16 registers. */
12638 vfp_output_fldmd (stream, base, reg, 16);
12639 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12643 fputc ('\t', stream);
12644 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12646 for (i = reg; i < reg + count; i++)
12649 fputs (", ", stream);
12650 asm_fprintf (stream, "d%d", i);
12652 fputs ("}\n", stream);
12657 /* Output the assembly for a store multiple. */
12660 vfp_output_fstmd (rtx * operands)
12667 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12668 p = strlen (pattern);
12670 gcc_assert (GET_CODE (operands[1]) == REG);
12672 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12673 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12675 p += sprintf (&pattern[p], ", d%d", base + i);
12677 strcpy (&pattern[p], "}");
12679 output_asm_insn (pattern, operands);
12684 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12685 number of bytes pushed. */
12688 vfp_emit_fstmd (int base_reg, int count)
12695 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12696 register pairs are stored by a store multiple insn. We avoid this
12697 by pushing an extra pair. */
12698 if (count == 2 && !arm_arch6)
12700 if (base_reg == LAST_VFP_REGNUM - 3)
12705 /* FSTMD may not store more than 16 doubleword registers at once. Split
12706 larger stores into multiple parts (up to a maximum of two, in
12711 /* NOTE: base_reg is an internal register number, so each D register
12713 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12714 saved += vfp_emit_fstmd (base_reg, 16);
12718 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12719 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12721 reg = gen_rtx_REG (DFmode, base_reg);
12724 XVECEXP (par, 0, 0)
12725 = gen_rtx_SET (VOIDmode,
12728 gen_rtx_PRE_MODIFY (Pmode,
12731 (stack_pointer_rtx,
12734 gen_rtx_UNSPEC (BLKmode,
12735 gen_rtvec (1, reg),
12736 UNSPEC_PUSH_MULT));
12738 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12739 plus_constant (stack_pointer_rtx, -(count * 8)));
12740 RTX_FRAME_RELATED_P (tmp) = 1;
12741 XVECEXP (dwarf, 0, 0) = tmp;
12743 tmp = gen_rtx_SET (VOIDmode,
12744 gen_frame_mem (DFmode, stack_pointer_rtx),
12746 RTX_FRAME_RELATED_P (tmp) = 1;
12747 XVECEXP (dwarf, 0, 1) = tmp;
12749 for (i = 1; i < count; i++)
12751 reg = gen_rtx_REG (DFmode, base_reg);
12753 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12755 tmp = gen_rtx_SET (VOIDmode,
12756 gen_frame_mem (DFmode,
12757 plus_constant (stack_pointer_rtx,
12760 RTX_FRAME_RELATED_P (tmp) = 1;
12761 XVECEXP (dwarf, 0, i + 1) = tmp;
12764 par = emit_insn (par);
12765 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12766 RTX_FRAME_RELATED_P (par) = 1;
12771 /* Emit a call instruction with pattern PAT. ADDR is the address of
12772 the call target. */
12775 arm_emit_call_insn (rtx pat, rtx addr)
12779 insn = emit_call_insn (pat);
12781 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12782 If the call might use such an entry, add a use of the PIC register
12783 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12784 if (TARGET_VXWORKS_RTP
12786 && GET_CODE (addr) == SYMBOL_REF
12787 && (SYMBOL_REF_DECL (addr)
12788 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12789 : !SYMBOL_REF_LOCAL_P (addr)))
12791 require_pic_register ();
12792 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12796 /* Output a 'call' insn. */
12798 output_call (rtx *operands)
12800 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12802 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12803 if (REGNO (operands[0]) == LR_REGNUM)
12805 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12806 output_asm_insn ("mov%?\t%0, %|lr", operands);
12809 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12811 if (TARGET_INTERWORK || arm_arch4t)
12812 output_asm_insn ("bx%?\t%0", operands);
12814 output_asm_insn ("mov%?\t%|pc, %0", operands);
12819 /* Output a 'call' insn that is a reference in memory. This is
12820 disabled for ARMv5 and we prefer a blx instead because otherwise
12821 there's a significant performance overhead. */
12823 output_call_mem (rtx *operands)
12825 gcc_assert (!arm_arch5);
12826 if (TARGET_INTERWORK)
12828 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12829 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12830 output_asm_insn ("bx%?\t%|ip", operands);
12832 else if (regno_use_in (LR_REGNUM, operands[0]))
12834 /* LR is used in the memory address. We load the address in the
12835 first instruction. It's safe to use IP as the target of the
12836 load since the call will kill it anyway. */
12837 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12838 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12840 output_asm_insn ("bx%?\t%|ip", operands);
12842 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12846 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12847 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12854 /* Output a move from arm registers to an fpa registers.
12855 OPERANDS[0] is an fpa register.
12856 OPERANDS[1] is the first registers of an arm register pair. */
12858 output_mov_long_double_fpa_from_arm (rtx *operands)
12860 int arm_reg0 = REGNO (operands[1]);
12863 gcc_assert (arm_reg0 != IP_REGNUM);
12865 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12866 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12867 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12869 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12870 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12875 /* Output a move from an fpa register to arm registers.
12876 OPERANDS[0] is the first registers of an arm register pair.
12877 OPERANDS[1] is an fpa register. */
12879 output_mov_long_double_arm_from_fpa (rtx *operands)
12881 int arm_reg0 = REGNO (operands[0]);
12884 gcc_assert (arm_reg0 != IP_REGNUM);
12886 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12887 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12888 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12890 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12891 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12895 /* Output a move from arm registers to arm registers of a long double
12896 OPERANDS[0] is the destination.
12897 OPERANDS[1] is the source. */
12899 output_mov_long_double_arm_from_arm (rtx *operands)
12901 /* We have to be careful here because the two might overlap. */
12902 int dest_start = REGNO (operands[0]);
12903 int src_start = REGNO (operands[1]);
12907 if (dest_start < src_start)
12909 for (i = 0; i < 3; i++)
12911 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12912 ops[1] = gen_rtx_REG (SImode, src_start + i);
12913 output_asm_insn ("mov%?\t%0, %1", ops);
12918 for (i = 2; i >= 0; i--)
12920 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12921 ops[1] = gen_rtx_REG (SImode, src_start + i);
12922 output_asm_insn ("mov%?\t%0, %1", ops);
12930 arm_emit_movpair (rtx dest, rtx src)
12932 /* If the src is an immediate, simplify it. */
12933 if (CONST_INT_P (src))
12935 HOST_WIDE_INT val = INTVAL (src);
12936 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12937 if ((val >> 16) & 0x0000ffff)
12938 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12940 GEN_INT ((val >> 16) & 0x0000ffff));
12943 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12944 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12947 /* Output a move from arm registers to an fpa registers.
12948 OPERANDS[0] is an fpa register.
12949 OPERANDS[1] is the first registers of an arm register pair. */
12951 output_mov_double_fpa_from_arm (rtx *operands)
12953 int arm_reg0 = REGNO (operands[1]);
12956 gcc_assert (arm_reg0 != IP_REGNUM);
12958 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12959 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12960 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12961 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12965 /* Output a move from an fpa register to arm registers.
12966 OPERANDS[0] is the first registers of an arm register pair.
12967 OPERANDS[1] is an fpa register. */
12969 output_mov_double_arm_from_fpa (rtx *operands)
12971 int arm_reg0 = REGNO (operands[0]);
12974 gcc_assert (arm_reg0 != IP_REGNUM);
12976 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12977 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12978 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12979 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12983 /* Output a move between double words. It must be REG<-MEM
12986 output_move_double (rtx *operands)
12988 enum rtx_code code0 = GET_CODE (operands[0]);
12989 enum rtx_code code1 = GET_CODE (operands[1]);
12994 unsigned int reg0 = REGNO (operands[0]);
12996 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12998 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13000 switch (GET_CODE (XEXP (operands[1], 0)))
13004 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13005 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13007 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13011 gcc_assert (TARGET_LDRD);
13012 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13017 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13019 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13024 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13026 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13030 gcc_assert (TARGET_LDRD);
13031 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13036 /* Autoicrement addressing modes should never have overlapping
13037 base and destination registers, and overlapping index registers
13038 are already prohibited, so this doesn't need to worry about
13040 otherops[0] = operands[0];
13041 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13042 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13044 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13046 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13048 /* Registers overlap so split out the increment. */
13049 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13050 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13054 /* Use a single insn if we can.
13055 FIXME: IWMMXT allows offsets larger than ldrd can
13056 handle, fix these up with a pair of ldr. */
13058 || GET_CODE (otherops[2]) != CONST_INT
13059 || (INTVAL (otherops[2]) > -256
13060 && INTVAL (otherops[2]) < 256))
13061 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13064 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13065 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13071 /* Use a single insn if we can.
13072 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13073 fix these up with a pair of ldr. */
13075 || GET_CODE (otherops[2]) != CONST_INT
13076 || (INTVAL (otherops[2]) > -256
13077 && INTVAL (otherops[2]) < 256))
13078 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13081 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13082 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13089 /* We might be able to use ldrd %0, %1 here. However the range is
13090 different to ldr/adr, and it is broken on some ARMv7-M
13091 implementations. */
13092 /* Use the second register of the pair to avoid problematic
13094 otherops[1] = operands[1];
13095 output_asm_insn ("adr%?\t%0, %1", otherops);
13096 operands[1] = otherops[0];
13098 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13100 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13103 /* ??? This needs checking for thumb2. */
13105 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13106 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13108 otherops[0] = operands[0];
13109 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13110 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13112 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13114 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13116 switch ((int) INTVAL (otherops[2]))
13119 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13124 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13129 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13133 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13134 operands[1] = otherops[0];
13136 && (GET_CODE (otherops[2]) == REG
13138 || (GET_CODE (otherops[2]) == CONST_INT
13139 && INTVAL (otherops[2]) > -256
13140 && INTVAL (otherops[2]) < 256)))
13142 if (reg_overlap_mentioned_p (operands[0],
13146 /* Swap base and index registers over to
13147 avoid a conflict. */
13149 otherops[1] = otherops[2];
13152 /* If both registers conflict, it will usually
13153 have been fixed by a splitter. */
13154 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13155 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13157 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13158 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13162 otherops[0] = operands[0];
13163 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13168 if (GET_CODE (otherops[2]) == CONST_INT)
13170 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13171 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13173 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13176 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13179 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13182 return "ldr%(d%)\t%0, [%1]";
13184 return "ldm%(ia%)\t%1, %M0";
13188 otherops[1] = adjust_address (operands[1], SImode, 4);
13189 /* Take care of overlapping base/data reg. */
13190 if (reg_mentioned_p (operands[0], operands[1]))
13192 output_asm_insn ("ldr%?\t%0, %1", otherops);
13193 output_asm_insn ("ldr%?\t%0, %1", operands);
13197 output_asm_insn ("ldr%?\t%0, %1", operands);
13198 output_asm_insn ("ldr%?\t%0, %1", otherops);
13205 /* Constraints should ensure this. */
13206 gcc_assert (code0 == MEM && code1 == REG);
13207 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13209 switch (GET_CODE (XEXP (operands[0], 0)))
13213 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13215 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13219 gcc_assert (TARGET_LDRD);
13220 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13225 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13227 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13232 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13234 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13238 gcc_assert (TARGET_LDRD);
13239 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13244 otherops[0] = operands[1];
13245 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13246 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13248 /* IWMMXT allows offsets larger than ldrd can handle,
13249 fix these up with a pair of ldr. */
13251 && GET_CODE (otherops[2]) == CONST_INT
13252 && (INTVAL(otherops[2]) <= -256
13253 || INTVAL(otherops[2]) >= 256))
13255 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13257 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13258 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13262 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13263 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13266 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13267 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13269 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13273 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13274 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13276 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13279 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13285 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13291 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13296 && (GET_CODE (otherops[2]) == REG
13298 || (GET_CODE (otherops[2]) == CONST_INT
13299 && INTVAL (otherops[2]) > -256
13300 && INTVAL (otherops[2]) < 256)))
13302 otherops[0] = operands[1];
13303 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13304 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13310 otherops[0] = adjust_address (operands[0], SImode, 4);
13311 otherops[1] = operands[1];
13312 output_asm_insn ("str%?\t%1, %0", operands);
13313 output_asm_insn ("str%?\t%H1, %0", otherops);
13320 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13321 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13324 output_move_quad (rtx *operands)
13326 if (REG_P (operands[0]))
13328 /* Load, or reg->reg move. */
13330 if (MEM_P (operands[1]))
13332 switch (GET_CODE (XEXP (operands[1], 0)))
13335 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13340 output_asm_insn ("adr%?\t%0, %1", operands);
13341 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13345 gcc_unreachable ();
13353 gcc_assert (REG_P (operands[1]));
13355 dest = REGNO (operands[0]);
13356 src = REGNO (operands[1]);
13358 /* This seems pretty dumb, but hopefully GCC won't try to do it
13361 for (i = 0; i < 4; i++)
13363 ops[0] = gen_rtx_REG (SImode, dest + i);
13364 ops[1] = gen_rtx_REG (SImode, src + i);
13365 output_asm_insn ("mov%?\t%0, %1", ops);
13368 for (i = 3; i >= 0; i--)
13370 ops[0] = gen_rtx_REG (SImode, dest + i);
13371 ops[1] = gen_rtx_REG (SImode, src + i);
13372 output_asm_insn ("mov%?\t%0, %1", ops);
13378 gcc_assert (MEM_P (operands[0]));
13379 gcc_assert (REG_P (operands[1]));
13380 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13382 switch (GET_CODE (XEXP (operands[0], 0)))
13385 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13389 gcc_unreachable ();
13396 /* Output a VFP load or store instruction. */
13399 output_move_vfp (rtx *operands)
13401 rtx reg, mem, addr, ops[2];
13402 int load = REG_P (operands[0]);
13403 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13404 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13407 enum machine_mode mode;
13409 reg = operands[!load];
13410 mem = operands[load];
13412 mode = GET_MODE (reg);
13414 gcc_assert (REG_P (reg));
13415 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13416 gcc_assert (mode == SFmode
13420 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13421 gcc_assert (MEM_P (mem));
13423 addr = XEXP (mem, 0);
13425 switch (GET_CODE (addr))
13428 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13429 ops[0] = XEXP (addr, 0);
13434 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13435 ops[0] = XEXP (addr, 0);
13440 templ = "f%s%c%%?\t%%%s0, %%1%s";
13446 sprintf (buff, templ,
13447 load ? "ld" : "st",
13450 integer_p ? "\t%@ int" : "");
13451 output_asm_insn (buff, ops);
13456 /* Output a Neon quad-word load or store, or a load or store for
13457 larger structure modes.
13459 WARNING: The ordering of elements is weird in big-endian mode,
13460 because we use VSTM, as required by the EABI. GCC RTL defines
13461 element ordering based on in-memory order. This can be differ
13462 from the architectural ordering of elements within a NEON register.
13463 The intrinsics defined in arm_neon.h use the NEON register element
13464 ordering, not the GCC RTL element ordering.
13466 For example, the in-memory ordering of a big-endian a quadword
13467 vector with 16-bit elements when stored from register pair {d0,d1}
13468 will be (lowest address first, d0[N] is NEON register element N):
13470 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13472 When necessary, quadword registers (dN, dN+1) are moved to ARM
13473 registers from rN in the order:
13475 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13477 So that STM/LDM can be used on vectors in ARM registers, and the
13478 same memory layout will result as if VSTM/VLDM were used. */
13481 output_move_neon (rtx *operands)
13483 rtx reg, mem, addr, ops[2];
13484 int regno, load = REG_P (operands[0]);
13487 enum machine_mode mode;
13489 reg = operands[!load];
13490 mem = operands[load];
13492 mode = GET_MODE (reg);
13494 gcc_assert (REG_P (reg));
13495 regno = REGNO (reg);
13496 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13497 || NEON_REGNO_OK_FOR_QUAD (regno));
13498 gcc_assert (VALID_NEON_DREG_MODE (mode)
13499 || VALID_NEON_QREG_MODE (mode)
13500 || VALID_NEON_STRUCT_MODE (mode));
13501 gcc_assert (MEM_P (mem));
13503 addr = XEXP (mem, 0);
13505 /* Strip off const from addresses like (const (plus (...))). */
13506 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13507 addr = XEXP (addr, 0);
13509 switch (GET_CODE (addr))
13512 templ = "v%smia%%?\t%%0!, %%h1";
13513 ops[0] = XEXP (addr, 0);
13518 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13519 templ = "v%smdb%%?\t%%0!, %%h1";
13520 ops[0] = XEXP (addr, 0);
13525 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13526 gcc_unreachable ();
13531 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13534 for (i = 0; i < nregs; i++)
13536 /* We're only using DImode here because it's a convenient size. */
13537 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13538 ops[1] = adjust_address (mem, DImode, 8 * i);
13539 if (reg_overlap_mentioned_p (ops[0], mem))
13541 gcc_assert (overlap == -1);
13546 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13547 output_asm_insn (buff, ops);
13552 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13553 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13554 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13555 output_asm_insn (buff, ops);
13562 templ = "v%smia%%?\t%%m0, %%h1";
13567 sprintf (buff, templ, load ? "ld" : "st");
13568 output_asm_insn (buff, ops);
13573 /* Compute and return the length of neon_mov<mode>, where <mode> is
13574 one of VSTRUCT modes: EI, OI, CI or XI. */
13576 arm_attr_length_move_neon (rtx insn)
13578 rtx reg, mem, addr;
13580 enum machine_mode mode;
13582 extract_insn_cached (insn);
13584 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13586 mode = GET_MODE (recog_data.operand[0]);
13597 gcc_unreachable ();
13601 load = REG_P (recog_data.operand[0]);
13602 reg = recog_data.operand[!load];
13603 mem = recog_data.operand[load];
13605 gcc_assert (MEM_P (mem));
13607 mode = GET_MODE (reg);
13608 addr = XEXP (mem, 0);
13610 /* Strip off const from addresses like (const (plus (...))). */
13611 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13612 addr = XEXP (addr, 0);
13614 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13616 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13623 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13627 arm_address_offset_is_imm (rtx insn)
13631 extract_insn_cached (insn);
13633 if (REG_P (recog_data.operand[0]))
13636 mem = recog_data.operand[0];
13638 gcc_assert (MEM_P (mem));
13640 addr = XEXP (mem, 0);
13642 if (GET_CODE (addr) == REG
13643 || (GET_CODE (addr) == PLUS
13644 && GET_CODE (XEXP (addr, 0)) == REG
13645 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13651 /* Output an ADD r, s, #n where n may be too big for one instruction.
13652 If adding zero to one register, output nothing. */
13654 output_add_immediate (rtx *operands)
13656 HOST_WIDE_INT n = INTVAL (operands[2]);
13658 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13661 output_multi_immediate (operands,
13662 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13665 output_multi_immediate (operands,
13666 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13673 /* Output a multiple immediate operation.
13674 OPERANDS is the vector of operands referred to in the output patterns.
13675 INSTR1 is the output pattern to use for the first constant.
13676 INSTR2 is the output pattern to use for subsequent constants.
13677 IMMED_OP is the index of the constant slot in OPERANDS.
13678 N is the constant value. */
13679 static const char *
13680 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13681 int immed_op, HOST_WIDE_INT n)
13683 #if HOST_BITS_PER_WIDE_INT > 32
13689 /* Quick and easy output. */
13690 operands[immed_op] = const0_rtx;
13691 output_asm_insn (instr1, operands);
13696 const char * instr = instr1;
13698 /* Note that n is never zero here (which would give no output). */
13699 for (i = 0; i < 32; i += 2)
13703 operands[immed_op] = GEN_INT (n & (255 << i));
13704 output_asm_insn (instr, operands);
13714 /* Return the name of a shifter operation. */
13715 static const char *
13716 arm_shift_nmem(enum rtx_code code)
13721 return ARM_LSL_NAME;
13737 /* Return the appropriate ARM instruction for the operation code.
13738 The returned result should not be overwritten. OP is the rtx of the
13739 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13742 arithmetic_instr (rtx op, int shift_first_arg)
13744 switch (GET_CODE (op))
13750 return shift_first_arg ? "rsb" : "sub";
13765 return arm_shift_nmem(GET_CODE(op));
13768 gcc_unreachable ();
13772 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13773 for the operation code. The returned result should not be overwritten.
13774 OP is the rtx code of the shift.
13775 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13777 static const char *
13778 shift_op (rtx op, HOST_WIDE_INT *amountp)
13781 enum rtx_code code = GET_CODE (op);
13783 switch (GET_CODE (XEXP (op, 1)))
13791 *amountp = INTVAL (XEXP (op, 1));
13795 gcc_unreachable ();
13801 gcc_assert (*amountp != -1);
13802 *amountp = 32 - *amountp;
13805 /* Fall through. */
13811 mnem = arm_shift_nmem(code);
13815 /* We never have to worry about the amount being other than a
13816 power of 2, since this case can never be reloaded from a reg. */
13817 gcc_assert (*amountp != -1);
13818 *amountp = int_log2 (*amountp);
13819 return ARM_LSL_NAME;
13822 gcc_unreachable ();
13825 if (*amountp != -1)
13827 /* This is not 100% correct, but follows from the desire to merge
13828 multiplication by a power of 2 with the recognizer for a
13829 shift. >=32 is not a valid shift for "lsl", so we must try and
13830 output a shift that produces the correct arithmetical result.
13831 Using lsr #32 is identical except for the fact that the carry bit
13832 is not set correctly if we set the flags; but we never use the
13833 carry bit from such an operation, so we can ignore that. */
13834 if (code == ROTATERT)
13835 /* Rotate is just modulo 32. */
13837 else if (*amountp != (*amountp & 31))
13839 if (code == ASHIFT)
13844 /* Shifts of 0 are no-ops. */
13852 /* Obtain the shift from the POWER of two. */
13854 static HOST_WIDE_INT
13855 int_log2 (HOST_WIDE_INT power)
13857 HOST_WIDE_INT shift = 0;
13859 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13861 gcc_assert (shift <= 31);
13868 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13869 because /bin/as is horribly restrictive. The judgement about
13870 whether or not each character is 'printable' (and can be output as
13871 is) or not (and must be printed with an octal escape) must be made
13872 with reference to the *host* character set -- the situation is
13873 similar to that discussed in the comments above pp_c_char in
13874 c-pretty-print.c. */
13876 #define MAX_ASCII_LEN 51
13879 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13882 int len_so_far = 0;
13884 fputs ("\t.ascii\t\"", stream);
13886 for (i = 0; i < len; i++)
13890 if (len_so_far >= MAX_ASCII_LEN)
13892 fputs ("\"\n\t.ascii\t\"", stream);
13898 if (c == '\\' || c == '\"')
13900 putc ('\\', stream);
13908 fprintf (stream, "\\%03o", c);
13913 fputs ("\"\n", stream);
13916 /* Compute the register save mask for registers 0 through 12
13917 inclusive. This code is used by arm_compute_save_reg_mask. */
13919 static unsigned long
13920 arm_compute_save_reg0_reg12_mask (void)
13922 unsigned long func_type = arm_current_func_type ();
13923 unsigned long save_reg_mask = 0;
13926 if (IS_INTERRUPT (func_type))
13928 unsigned int max_reg;
13929 /* Interrupt functions must not corrupt any registers,
13930 even call clobbered ones. If this is a leaf function
13931 we can just examine the registers used by the RTL, but
13932 otherwise we have to assume that whatever function is
13933 called might clobber anything, and so we have to save
13934 all the call-clobbered registers as well. */
13935 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13936 /* FIQ handlers have registers r8 - r12 banked, so
13937 we only need to check r0 - r7, Normal ISRs only
13938 bank r14 and r15, so we must check up to r12.
13939 r13 is the stack pointer which is always preserved,
13940 so we do not need to consider it here. */
13945 for (reg = 0; reg <= max_reg; reg++)
13946 if (df_regs_ever_live_p (reg)
13947 || (! current_function_is_leaf && call_used_regs[reg]))
13948 save_reg_mask |= (1 << reg);
13950 /* Also save the pic base register if necessary. */
13952 && !TARGET_SINGLE_PIC_BASE
13953 && arm_pic_register != INVALID_REGNUM
13954 && crtl->uses_pic_offset_table)
13955 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13957 else if (IS_VOLATILE(func_type))
13959 /* For noreturn functions we historically omitted register saves
13960 altogether. However this really messes up debugging. As a
13961 compromise save just the frame pointers. Combined with the link
13962 register saved elsewhere this should be sufficient to get
13964 if (frame_pointer_needed)
13965 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13966 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13967 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13968 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13969 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13973 /* In the normal case we only need to save those registers
13974 which are call saved and which are used by this function. */
13975 for (reg = 0; reg <= 11; reg++)
13976 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13977 save_reg_mask |= (1 << reg);
13979 /* Handle the frame pointer as a special case. */
13980 if (frame_pointer_needed)
13981 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13983 /* If we aren't loading the PIC register,
13984 don't stack it even though it may be live. */
13986 && !TARGET_SINGLE_PIC_BASE
13987 && arm_pic_register != INVALID_REGNUM
13988 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13989 || crtl->uses_pic_offset_table))
13990 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13992 /* The prologue will copy SP into R0, so save it. */
13993 if (IS_STACKALIGN (func_type))
13994 save_reg_mask |= 1;
13997 /* Save registers so the exception handler can modify them. */
13998 if (crtl->calls_eh_return)
14004 reg = EH_RETURN_DATA_REGNO (i);
14005 if (reg == INVALID_REGNUM)
14007 save_reg_mask |= 1 << reg;
14011 return save_reg_mask;
14015 /* Compute the number of bytes used to store the static chain register on the
14016 stack, above the stack frame. We need to know this accurately to get the
14017 alignment of the rest of the stack frame correct. */
14019 static int arm_compute_static_chain_stack_bytes (void)
14021 unsigned long func_type = arm_current_func_type ();
14022 int static_chain_stack_bytes = 0;
14024 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14025 IS_NESTED (func_type) &&
14026 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14027 static_chain_stack_bytes = 4;
14029 return static_chain_stack_bytes;
14033 /* Compute a bit mask of which registers need to be
14034 saved on the stack for the current function.
14035 This is used by arm_get_frame_offsets, which may add extra registers. */
14037 static unsigned long
14038 arm_compute_save_reg_mask (void)
14040 unsigned int save_reg_mask = 0;
14041 unsigned long func_type = arm_current_func_type ();
14044 if (IS_NAKED (func_type))
14045 /* This should never really happen. */
14048 /* If we are creating a stack frame, then we must save the frame pointer,
14049 IP (which will hold the old stack pointer), LR and the PC. */
14050 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14052 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14055 | (1 << PC_REGNUM);
14057 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14059 /* Decide if we need to save the link register.
14060 Interrupt routines have their own banked link register,
14061 so they never need to save it.
14062 Otherwise if we do not use the link register we do not need to save
14063 it. If we are pushing other registers onto the stack however, we
14064 can save an instruction in the epilogue by pushing the link register
14065 now and then popping it back into the PC. This incurs extra memory
14066 accesses though, so we only do it when optimizing for size, and only
14067 if we know that we will not need a fancy return sequence. */
14068 if (df_regs_ever_live_p (LR_REGNUM)
14071 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14072 && !crtl->calls_eh_return))
14073 save_reg_mask |= 1 << LR_REGNUM;
14075 if (cfun->machine->lr_save_eliminated)
14076 save_reg_mask &= ~ (1 << LR_REGNUM);
14078 if (TARGET_REALLY_IWMMXT
14079 && ((bit_count (save_reg_mask)
14080 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14081 arm_compute_static_chain_stack_bytes())
14084 /* The total number of registers that are going to be pushed
14085 onto the stack is odd. We need to ensure that the stack
14086 is 64-bit aligned before we start to save iWMMXt registers,
14087 and also before we start to create locals. (A local variable
14088 might be a double or long long which we will load/store using
14089 an iWMMXt instruction). Therefore we need to push another
14090 ARM register, so that the stack will be 64-bit aligned. We
14091 try to avoid using the arg registers (r0 -r3) as they might be
14092 used to pass values in a tail call. */
14093 for (reg = 4; reg <= 12; reg++)
14094 if ((save_reg_mask & (1 << reg)) == 0)
14098 save_reg_mask |= (1 << reg);
14101 cfun->machine->sibcall_blocked = 1;
14102 save_reg_mask |= (1 << 3);
14106 /* We may need to push an additional register for use initializing the
14107 PIC base register. */
14108 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14109 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14111 reg = thumb_find_work_register (1 << 4);
14112 if (!call_used_regs[reg])
14113 save_reg_mask |= (1 << reg);
14116 return save_reg_mask;
14120 /* Compute a bit mask of which registers need to be
14121 saved on the stack for the current function. */
14122 static unsigned long
14123 thumb1_compute_save_reg_mask (void)
14125 unsigned long mask;
14129 for (reg = 0; reg < 12; reg ++)
14130 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14134 && !TARGET_SINGLE_PIC_BASE
14135 && arm_pic_register != INVALID_REGNUM
14136 && crtl->uses_pic_offset_table)
14137 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14139 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14140 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14141 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14143 /* LR will also be pushed if any lo regs are pushed. */
14144 if (mask & 0xff || thumb_force_lr_save ())
14145 mask |= (1 << LR_REGNUM);
14147 /* Make sure we have a low work register if we need one.
14148 We will need one if we are going to push a high register,
14149 but we are not currently intending to push a low register. */
14150 if ((mask & 0xff) == 0
14151 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14153 /* Use thumb_find_work_register to choose which register
14154 we will use. If the register is live then we will
14155 have to push it. Use LAST_LO_REGNUM as our fallback
14156 choice for the register to select. */
14157 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14158 /* Make sure the register returned by thumb_find_work_register is
14159 not part of the return value. */
14160 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14161 reg = LAST_LO_REGNUM;
14163 if (! call_used_regs[reg])
14167 /* The 504 below is 8 bytes less than 512 because there are two possible
14168 alignment words. We can't tell here if they will be present or not so we
14169 have to play it safe and assume that they are. */
14170 if ((CALLER_INTERWORKING_SLOT_SIZE +
14171 ROUND_UP_WORD (get_frame_size ()) +
14172 crtl->outgoing_args_size) >= 504)
14174 /* This is the same as the code in thumb1_expand_prologue() which
14175 determines which register to use for stack decrement. */
14176 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14177 if (mask & (1 << reg))
14180 if (reg > LAST_LO_REGNUM)
14182 /* Make sure we have a register available for stack decrement. */
14183 mask |= 1 << LAST_LO_REGNUM;
14191 /* Return the number of bytes required to save VFP registers. */
14193 arm_get_vfp_saved_size (void)
14195 unsigned int regno;
14200 /* Space for saved VFP registers. */
14201 if (TARGET_HARD_FLOAT && TARGET_VFP)
14204 for (regno = FIRST_VFP_REGNUM;
14205 regno < LAST_VFP_REGNUM;
14208 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14209 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14213 /* Workaround ARM10 VFPr1 bug. */
14214 if (count == 2 && !arm_arch6)
14216 saved += count * 8;
14225 if (count == 2 && !arm_arch6)
14227 saved += count * 8;
14234 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14235 everything bar the final return instruction. */
14237 output_return_instruction (rtx operand, int really_return, int reverse)
14239 char conditional[10];
14242 unsigned long live_regs_mask;
14243 unsigned long func_type;
14244 arm_stack_offsets *offsets;
14246 func_type = arm_current_func_type ();
14248 if (IS_NAKED (func_type))
14251 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14253 /* If this function was declared non-returning, and we have
14254 found a tail call, then we have to trust that the called
14255 function won't return. */
14260 /* Otherwise, trap an attempted return by aborting. */
14262 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14264 assemble_external_libcall (ops[1]);
14265 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14271 gcc_assert (!cfun->calls_alloca || really_return);
14273 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14275 cfun->machine->return_used_this_function = 1;
14277 offsets = arm_get_frame_offsets ();
14278 live_regs_mask = offsets->saved_regs_mask;
14280 if (live_regs_mask)
14282 const char * return_reg;
14284 /* If we do not have any special requirements for function exit
14285 (e.g. interworking) then we can load the return address
14286 directly into the PC. Otherwise we must load it into LR. */
14288 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14289 return_reg = reg_names[PC_REGNUM];
14291 return_reg = reg_names[LR_REGNUM];
14293 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14295 /* There are three possible reasons for the IP register
14296 being saved. 1) a stack frame was created, in which case
14297 IP contains the old stack pointer, or 2) an ISR routine
14298 corrupted it, or 3) it was saved to align the stack on
14299 iWMMXt. In case 1, restore IP into SP, otherwise just
14301 if (frame_pointer_needed)
14303 live_regs_mask &= ~ (1 << IP_REGNUM);
14304 live_regs_mask |= (1 << SP_REGNUM);
14307 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14310 /* On some ARM architectures it is faster to use LDR rather than
14311 LDM to load a single register. On other architectures, the
14312 cost is the same. In 26 bit mode, or for exception handlers,
14313 we have to use LDM to load the PC so that the CPSR is also
14315 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14316 if (live_regs_mask == (1U << reg))
14319 if (reg <= LAST_ARM_REGNUM
14320 && (reg != LR_REGNUM
14322 || ! IS_INTERRUPT (func_type)))
14324 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14325 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14332 /* Generate the load multiple instruction to restore the
14333 registers. Note we can get here, even if
14334 frame_pointer_needed is true, but only if sp already
14335 points to the base of the saved core registers. */
14336 if (live_regs_mask & (1 << SP_REGNUM))
14338 unsigned HOST_WIDE_INT stack_adjust;
14340 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14341 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14343 if (stack_adjust && arm_arch5 && TARGET_ARM)
14344 if (TARGET_UNIFIED_ASM)
14345 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14347 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14350 /* If we can't use ldmib (SA110 bug),
14351 then try to pop r3 instead. */
14353 live_regs_mask |= 1 << 3;
14355 if (TARGET_UNIFIED_ASM)
14356 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14358 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14362 if (TARGET_UNIFIED_ASM)
14363 sprintf (instr, "pop%s\t{", conditional);
14365 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14367 p = instr + strlen (instr);
14369 for (reg = 0; reg <= SP_REGNUM; reg++)
14370 if (live_regs_mask & (1 << reg))
14372 int l = strlen (reg_names[reg]);
14378 memcpy (p, ", ", 2);
14382 memcpy (p, "%|", 2);
14383 memcpy (p + 2, reg_names[reg], l);
14387 if (live_regs_mask & (1 << LR_REGNUM))
14389 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14390 /* If returning from an interrupt, restore the CPSR. */
14391 if (IS_INTERRUPT (func_type))
14398 output_asm_insn (instr, & operand);
14400 /* See if we need to generate an extra instruction to
14401 perform the actual function return. */
14403 && func_type != ARM_FT_INTERWORKED
14404 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14406 /* The return has already been handled
14407 by loading the LR into the PC. */
14414 switch ((int) ARM_FUNC_TYPE (func_type))
14418 /* ??? This is wrong for unified assembly syntax. */
14419 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14422 case ARM_FT_INTERWORKED:
14423 sprintf (instr, "bx%s\t%%|lr", conditional);
14426 case ARM_FT_EXCEPTION:
14427 /* ??? This is wrong for unified assembly syntax. */
14428 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14432 /* Use bx if it's available. */
14433 if (arm_arch5 || arm_arch4t)
14434 sprintf (instr, "bx%s\t%%|lr", conditional);
14436 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14440 output_asm_insn (instr, & operand);
14446 /* Write the function name into the code section, directly preceding
14447 the function prologue.
14449 Code will be output similar to this:
14451 .ascii "arm_poke_function_name", 0
14454 .word 0xff000000 + (t1 - t0)
14455 arm_poke_function_name
14457 stmfd sp!, {fp, ip, lr, pc}
14460 When performing a stack backtrace, code can inspect the value
14461 of 'pc' stored at 'fp' + 0. If the trace function then looks
14462 at location pc - 12 and the top 8 bits are set, then we know
14463 that there is a function name embedded immediately preceding this
14464 location and has length ((pc[-3]) & 0xff000000).
14466 We assume that pc is declared as a pointer to an unsigned long.
14468 It is of no benefit to output the function name if we are assembling
14469 a leaf function. These function types will not contain a stack
14470 backtrace structure, therefore it is not possible to determine the
14473 arm_poke_function_name (FILE *stream, const char *name)
14475 unsigned long alignlength;
14476 unsigned long length;
14479 length = strlen (name) + 1;
14480 alignlength = ROUND_UP_WORD (length);
14482 ASM_OUTPUT_ASCII (stream, name, length);
14483 ASM_OUTPUT_ALIGN (stream, 2);
14484 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14485 assemble_aligned_integer (UNITS_PER_WORD, x);
14488 /* Place some comments into the assembler stream
14489 describing the current function. */
14491 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14493 unsigned long func_type;
14497 thumb1_output_function_prologue (f, frame_size);
14501 /* Sanity check. */
14502 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14504 func_type = arm_current_func_type ();
14506 switch ((int) ARM_FUNC_TYPE (func_type))
14509 case ARM_FT_NORMAL:
14511 case ARM_FT_INTERWORKED:
14512 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14515 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14518 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14520 case ARM_FT_EXCEPTION:
14521 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14525 if (IS_NAKED (func_type))
14526 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14528 if (IS_VOLATILE (func_type))
14529 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14531 if (IS_NESTED (func_type))
14532 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14533 if (IS_STACKALIGN (func_type))
14534 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14536 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14538 crtl->args.pretend_args_size, frame_size);
14540 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14541 frame_pointer_needed,
14542 cfun->machine->uses_anonymous_args);
14544 if (cfun->machine->lr_save_eliminated)
14545 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14547 if (crtl->calls_eh_return)
14548 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14553 arm_output_epilogue (rtx sibling)
14556 unsigned long saved_regs_mask;
14557 unsigned long func_type;
14558 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14559 frame that is $fp + 4 for a non-variadic function. */
14560 int floats_offset = 0;
14562 FILE * f = asm_out_file;
14563 unsigned int lrm_count = 0;
14564 int really_return = (sibling == NULL);
14566 arm_stack_offsets *offsets;
14568 /* If we have already generated the return instruction
14569 then it is futile to generate anything else. */
14570 if (use_return_insn (FALSE, sibling) &&
14571 (cfun->machine->return_used_this_function != 0))
14574 func_type = arm_current_func_type ();
14576 if (IS_NAKED (func_type))
14577 /* Naked functions don't have epilogues. */
14580 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14584 /* A volatile function should never return. Call abort. */
14585 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14586 assemble_external_libcall (op);
14587 output_asm_insn ("bl\t%a0", &op);
14592 /* If we are throwing an exception, then we really must be doing a
14593 return, so we can't tail-call. */
14594 gcc_assert (!crtl->calls_eh_return || really_return);
14596 offsets = arm_get_frame_offsets ();
14597 saved_regs_mask = offsets->saved_regs_mask;
14600 lrm_count = bit_count (saved_regs_mask);
14602 floats_offset = offsets->saved_args;
14603 /* Compute how far away the floats will be. */
14604 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14605 if (saved_regs_mask & (1 << reg))
14606 floats_offset += 4;
14608 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14610 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14611 int vfp_offset = offsets->frame;
14613 if (TARGET_FPA_EMU2)
14615 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14616 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14618 floats_offset += 12;
14619 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14620 reg, FP_REGNUM, floats_offset - vfp_offset);
14625 start_reg = LAST_FPA_REGNUM;
14627 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14629 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14631 floats_offset += 12;
14633 /* We can't unstack more than four registers at once. */
14634 if (start_reg - reg == 3)
14636 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14637 reg, FP_REGNUM, floats_offset - vfp_offset);
14638 start_reg = reg - 1;
14643 if (reg != start_reg)
14644 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14645 reg + 1, start_reg - reg,
14646 FP_REGNUM, floats_offset - vfp_offset);
14647 start_reg = reg - 1;
14651 /* Just in case the last register checked also needs unstacking. */
14652 if (reg != start_reg)
14653 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14654 reg + 1, start_reg - reg,
14655 FP_REGNUM, floats_offset - vfp_offset);
14658 if (TARGET_HARD_FLOAT && TARGET_VFP)
14662 /* The fldmd insns do not have base+offset addressing
14663 modes, so we use IP to hold the address. */
14664 saved_size = arm_get_vfp_saved_size ();
14666 if (saved_size > 0)
14668 floats_offset += saved_size;
14669 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14670 FP_REGNUM, floats_offset - vfp_offset);
14672 start_reg = FIRST_VFP_REGNUM;
14673 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14675 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14676 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14678 if (start_reg != reg)
14679 vfp_output_fldmd (f, IP_REGNUM,
14680 (start_reg - FIRST_VFP_REGNUM) / 2,
14681 (reg - start_reg) / 2);
14682 start_reg = reg + 2;
14685 if (start_reg != reg)
14686 vfp_output_fldmd (f, IP_REGNUM,
14687 (start_reg - FIRST_VFP_REGNUM) / 2,
14688 (reg - start_reg) / 2);
14693 /* The frame pointer is guaranteed to be non-double-word aligned.
14694 This is because it is set to (old_stack_pointer - 4) and the
14695 old_stack_pointer was double word aligned. Thus the offset to
14696 the iWMMXt registers to be loaded must also be non-double-word
14697 sized, so that the resultant address *is* double-word aligned.
14698 We can ignore floats_offset since that was already included in
14699 the live_regs_mask. */
14700 lrm_count += (lrm_count % 2 ? 2 : 1);
14702 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14703 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14705 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14706 reg, FP_REGNUM, lrm_count * 4);
14711 /* saved_regs_mask should contain the IP, which at the time of stack
14712 frame generation actually contains the old stack pointer. So a
14713 quick way to unwind the stack is just pop the IP register directly
14714 into the stack pointer. */
14715 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14716 saved_regs_mask &= ~ (1 << IP_REGNUM);
14717 saved_regs_mask |= (1 << SP_REGNUM);
14719 /* There are two registers left in saved_regs_mask - LR and PC. We
14720 only need to restore the LR register (the return address), but to
14721 save time we can load it directly into the PC, unless we need a
14722 special function exit sequence, or we are not really returning. */
14724 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14725 && !crtl->calls_eh_return)
14726 /* Delete the LR from the register mask, so that the LR on
14727 the stack is loaded into the PC in the register mask. */
14728 saved_regs_mask &= ~ (1 << LR_REGNUM);
14730 saved_regs_mask &= ~ (1 << PC_REGNUM);
14732 /* We must use SP as the base register, because SP is one of the
14733 registers being restored. If an interrupt or page fault
14734 happens in the ldm instruction, the SP might or might not
14735 have been restored. That would be bad, as then SP will no
14736 longer indicate the safe area of stack, and we can get stack
14737 corruption. Using SP as the base register means that it will
14738 be reset correctly to the original value, should an interrupt
14739 occur. If the stack pointer already points at the right
14740 place, then omit the subtraction. */
14741 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14742 || cfun->calls_alloca)
14743 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14744 4 * bit_count (saved_regs_mask));
14745 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14747 if (IS_INTERRUPT (func_type))
14748 /* Interrupt handlers will have pushed the
14749 IP onto the stack, so restore it now. */
14750 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14754 /* This branch is executed for ARM mode (non-apcs frames) and
14755 Thumb-2 mode. Frame layout is essentially the same for those
14756 cases, except that in ARM mode frame pointer points to the
14757 first saved register, while in Thumb-2 mode the frame pointer points
14758 to the last saved register.
14760 It is possible to make frame pointer point to last saved
14761 register in both cases, and remove some conditionals below.
14762 That means that fp setup in prologue would be just "mov fp, sp"
14763 and sp restore in epilogue would be just "mov sp, fp", whereas
14764 now we have to use add/sub in those cases. However, the value
14765 of that would be marginal, as both mov and add/sub are 32-bit
14766 in ARM mode, and it would require extra conditionals
14767 in arm_expand_prologue to distingish ARM-apcs-frame case
14768 (where frame pointer is required to point at first register)
14769 and ARM-non-apcs-frame. Therefore, such change is postponed
14770 until real need arise. */
14771 unsigned HOST_WIDE_INT amount;
14773 /* Restore stack pointer if necessary. */
14774 if (TARGET_ARM && frame_pointer_needed)
14776 operands[0] = stack_pointer_rtx;
14777 operands[1] = hard_frame_pointer_rtx;
14779 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14780 output_add_immediate (operands);
14784 if (frame_pointer_needed)
14786 /* For Thumb-2 restore sp from the frame pointer.
14787 Operand restrictions mean we have to incrememnt FP, then copy
14789 amount = offsets->locals_base - offsets->saved_regs;
14790 operands[0] = hard_frame_pointer_rtx;
14794 unsigned long count;
14795 operands[0] = stack_pointer_rtx;
14796 amount = offsets->outgoing_args - offsets->saved_regs;
14797 /* pop call clobbered registers if it avoids a
14798 separate stack adjustment. */
14799 count = offsets->saved_regs - offsets->saved_args;
14802 && !crtl->calls_eh_return
14803 && bit_count(saved_regs_mask) * 4 == count
14804 && !IS_INTERRUPT (func_type)
14805 && !crtl->tail_call_emit)
14807 unsigned long mask;
14808 /* Preserve return values, of any size. */
14809 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14811 mask &= ~saved_regs_mask;
14813 while (bit_count (mask) * 4 > amount)
14815 while ((mask & (1 << reg)) == 0)
14817 mask &= ~(1 << reg);
14819 if (bit_count (mask) * 4 == amount) {
14821 saved_regs_mask |= mask;
14828 operands[1] = operands[0];
14829 operands[2] = GEN_INT (amount);
14830 output_add_immediate (operands);
14832 if (frame_pointer_needed)
14833 asm_fprintf (f, "\tmov\t%r, %r\n",
14834 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14837 if (TARGET_FPA_EMU2)
14839 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14840 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14841 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14846 start_reg = FIRST_FPA_REGNUM;
14848 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14850 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14852 if (reg - start_reg == 3)
14854 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14855 start_reg, SP_REGNUM);
14856 start_reg = reg + 1;
14861 if (reg != start_reg)
14862 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14863 start_reg, reg - start_reg,
14866 start_reg = reg + 1;
14870 /* Just in case the last register checked also needs unstacking. */
14871 if (reg != start_reg)
14872 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14873 start_reg, reg - start_reg, SP_REGNUM);
14876 if (TARGET_HARD_FLOAT && TARGET_VFP)
14878 int end_reg = LAST_VFP_REGNUM + 1;
14880 /* Scan the registers in reverse order. We need to match
14881 any groupings made in the prologue and generate matching
14883 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14885 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14886 && (!df_regs_ever_live_p (reg + 1)
14887 || call_used_regs[reg + 1]))
14889 if (end_reg > reg + 2)
14890 vfp_output_fldmd (f, SP_REGNUM,
14891 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14892 (end_reg - (reg + 2)) / 2);
14896 if (end_reg > reg + 2)
14897 vfp_output_fldmd (f, SP_REGNUM, 0,
14898 (end_reg - (reg + 2)) / 2);
14902 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14903 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14904 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14906 /* If we can, restore the LR into the PC. */
14907 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14908 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14909 && !IS_STACKALIGN (func_type)
14911 && crtl->args.pretend_args_size == 0
14912 && saved_regs_mask & (1 << LR_REGNUM)
14913 && !crtl->calls_eh_return)
14915 saved_regs_mask &= ~ (1 << LR_REGNUM);
14916 saved_regs_mask |= (1 << PC_REGNUM);
14917 rfe = IS_INTERRUPT (func_type);
14922 /* Load the registers off the stack. If we only have one register
14923 to load use the LDR instruction - it is faster. For Thumb-2
14924 always use pop and the assembler will pick the best instruction.*/
14925 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14926 && !IS_INTERRUPT(func_type))
14928 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14930 else if (saved_regs_mask)
14932 if (saved_regs_mask & (1 << SP_REGNUM))
14933 /* Note - write back to the stack register is not enabled
14934 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14935 in the list of registers and if we add writeback the
14936 instruction becomes UNPREDICTABLE. */
14937 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14939 else if (TARGET_ARM)
14940 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14943 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14946 if (crtl->args.pretend_args_size)
14948 /* Unwind the pre-pushed regs. */
14949 operands[0] = operands[1] = stack_pointer_rtx;
14950 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14951 output_add_immediate (operands);
14955 /* We may have already restored PC directly from the stack. */
14956 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14959 /* Stack adjustment for exception handler. */
14960 if (crtl->calls_eh_return)
14961 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14962 ARM_EH_STACKADJ_REGNUM);
14964 /* Generate the return instruction. */
14965 switch ((int) ARM_FUNC_TYPE (func_type))
14969 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14972 case ARM_FT_EXCEPTION:
14973 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14976 case ARM_FT_INTERWORKED:
14977 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14981 if (IS_STACKALIGN (func_type))
14983 /* See comment in arm_expand_prologue. */
14984 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14986 if (arm_arch5 || arm_arch4t)
14987 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14989 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14997 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14998 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15000 arm_stack_offsets *offsets;
15006 /* Emit any call-via-reg trampolines that are needed for v4t support
15007 of call_reg and call_value_reg type insns. */
15008 for (regno = 0; regno < LR_REGNUM; regno++)
15010 rtx label = cfun->machine->call_via[regno];
15014 switch_to_section (function_section (current_function_decl));
15015 targetm.asm_out.internal_label (asm_out_file, "L",
15016 CODE_LABEL_NUMBER (label));
15017 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15021 /* ??? Probably not safe to set this here, since it assumes that a
15022 function will be emitted as assembly immediately after we generate
15023 RTL for it. This does not happen for inline functions. */
15024 cfun->machine->return_used_this_function = 0;
15026 else /* TARGET_32BIT */
15028 /* We need to take into account any stack-frame rounding. */
15029 offsets = arm_get_frame_offsets ();
15031 gcc_assert (!use_return_insn (FALSE, NULL)
15032 || (cfun->machine->return_used_this_function != 0)
15033 || offsets->saved_regs == offsets->outgoing_args
15034 || frame_pointer_needed);
15036 /* Reset the ARM-specific per-function variables. */
15037 after_arm_reorg = 0;
15041 /* Generate and emit an insn that we will recognize as a push_multi.
15042 Unfortunately, since this insn does not reflect very well the actual
15043 semantics of the operation, we need to annotate the insn for the benefit
15044 of DWARF2 frame unwind information. */
15046 emit_multi_reg_push (unsigned long mask)
15049 int num_dwarf_regs;
15053 int dwarf_par_index;
15056 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15057 if (mask & (1 << i))
15060 gcc_assert (num_regs && num_regs <= 16);
15062 /* We don't record the PC in the dwarf frame information. */
15063 num_dwarf_regs = num_regs;
15064 if (mask & (1 << PC_REGNUM))
15067 /* For the body of the insn we are going to generate an UNSPEC in
15068 parallel with several USEs. This allows the insn to be recognized
15069 by the push_multi pattern in the arm.md file.
15071 The body of the insn looks something like this:
15074 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15075 (const_int:SI <num>)))
15076 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15082 For the frame note however, we try to be more explicit and actually
15083 show each register being stored into the stack frame, plus a (single)
15084 decrement of the stack pointer. We do it this way in order to be
15085 friendly to the stack unwinding code, which only wants to see a single
15086 stack decrement per instruction. The RTL we generate for the note looks
15087 something like this:
15090 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15091 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15092 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15093 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15097 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15098 instead we'd have a parallel expression detailing all
15099 the stores to the various memory addresses so that debug
15100 information is more up-to-date. Remember however while writing
15101 this to take care of the constraints with the push instruction.
15103 Note also that this has to be taken care of for the VFP registers.
15105 For more see PR43399. */
15107 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15108 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15109 dwarf_par_index = 1;
15111 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15113 if (mask & (1 << i))
15115 reg = gen_rtx_REG (SImode, i);
15117 XVECEXP (par, 0, 0)
15118 = gen_rtx_SET (VOIDmode,
15121 gen_rtx_PRE_MODIFY (Pmode,
15124 (stack_pointer_rtx,
15127 gen_rtx_UNSPEC (BLKmode,
15128 gen_rtvec (1, reg),
15129 UNSPEC_PUSH_MULT));
15131 if (i != PC_REGNUM)
15133 tmp = gen_rtx_SET (VOIDmode,
15134 gen_frame_mem (SImode, stack_pointer_rtx),
15136 RTX_FRAME_RELATED_P (tmp) = 1;
15137 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15145 for (j = 1, i++; j < num_regs; i++)
15147 if (mask & (1 << i))
15149 reg = gen_rtx_REG (SImode, i);
15151 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15153 if (i != PC_REGNUM)
15156 = gen_rtx_SET (VOIDmode,
15159 plus_constant (stack_pointer_rtx,
15162 RTX_FRAME_RELATED_P (tmp) = 1;
15163 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15170 par = emit_insn (par);
15172 tmp = gen_rtx_SET (VOIDmode,
15174 plus_constant (stack_pointer_rtx, -4 * num_regs));
15175 RTX_FRAME_RELATED_P (tmp) = 1;
15176 XVECEXP (dwarf, 0, 0) = tmp;
15178 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15183 /* Calculate the size of the return value that is passed in registers. */
15185 arm_size_return_regs (void)
15187 enum machine_mode mode;
15189 if (crtl->return_rtx != 0)
15190 mode = GET_MODE (crtl->return_rtx);
15192 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15194 return GET_MODE_SIZE (mode);
15198 emit_sfm (int base_reg, int count)
15205 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15206 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15208 reg = gen_rtx_REG (XFmode, base_reg++);
15210 XVECEXP (par, 0, 0)
15211 = gen_rtx_SET (VOIDmode,
15214 gen_rtx_PRE_MODIFY (Pmode,
15217 (stack_pointer_rtx,
15220 gen_rtx_UNSPEC (BLKmode,
15221 gen_rtvec (1, reg),
15222 UNSPEC_PUSH_MULT));
15223 tmp = gen_rtx_SET (VOIDmode,
15224 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15225 RTX_FRAME_RELATED_P (tmp) = 1;
15226 XVECEXP (dwarf, 0, 1) = tmp;
15228 for (i = 1; i < count; i++)
15230 reg = gen_rtx_REG (XFmode, base_reg++);
15231 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15233 tmp = gen_rtx_SET (VOIDmode,
15234 gen_frame_mem (XFmode,
15235 plus_constant (stack_pointer_rtx,
15238 RTX_FRAME_RELATED_P (tmp) = 1;
15239 XVECEXP (dwarf, 0, i + 1) = tmp;
15242 tmp = gen_rtx_SET (VOIDmode,
15244 plus_constant (stack_pointer_rtx, -12 * count));
15246 RTX_FRAME_RELATED_P (tmp) = 1;
15247 XVECEXP (dwarf, 0, 0) = tmp;
15249 par = emit_insn (par);
15250 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15256 /* Return true if the current function needs to save/restore LR. */
15259 thumb_force_lr_save (void)
15261 return !cfun->machine->lr_save_eliminated
15262 && (!leaf_function_p ()
15263 || thumb_far_jump_used_p ()
15264 || df_regs_ever_live_p (LR_REGNUM));
15268 /* Return true if r3 is used by any of the tail call insns in the
15269 current function. */
15272 any_sibcall_uses_r3 (void)
15277 if (!crtl->tail_call_emit)
15279 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15280 if (e->flags & EDGE_SIBCALL)
15282 rtx call = BB_END (e->src);
15283 if (!CALL_P (call))
15284 call = prev_nonnote_nondebug_insn (call);
15285 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15286 if (find_regno_fusage (call, USE, 3))
15293 /* Compute the distance from register FROM to register TO.
15294 These can be the arg pointer (26), the soft frame pointer (25),
15295 the stack pointer (13) or the hard frame pointer (11).
15296 In thumb mode r7 is used as the soft frame pointer, if needed.
15297 Typical stack layout looks like this:
15299 old stack pointer -> | |
15302 | | saved arguments for
15303 | | vararg functions
15306 hard FP & arg pointer -> | | \
15314 soft frame pointer -> | | /
15319 locals base pointer -> | | /
15324 current stack pointer -> | | /
15327 For a given function some or all of these stack components
15328 may not be needed, giving rise to the possibility of
15329 eliminating some of the registers.
15331 The values returned by this function must reflect the behavior
15332 of arm_expand_prologue() and arm_compute_save_reg_mask().
15334 The sign of the number returned reflects the direction of stack
15335 growth, so the values are positive for all eliminations except
15336 from the soft frame pointer to the hard frame pointer.
15338 SFP may point just inside the local variables block to ensure correct
15342 /* Calculate stack offsets. These are used to calculate register elimination
15343 offsets and in prologue/epilogue code. Also calculates which registers
15344 should be saved. */
15346 static arm_stack_offsets *
15347 arm_get_frame_offsets (void)
15349 struct arm_stack_offsets *offsets;
15350 unsigned long func_type;
15354 HOST_WIDE_INT frame_size;
15357 offsets = &cfun->machine->stack_offsets;
15359 /* We need to know if we are a leaf function. Unfortunately, it
15360 is possible to be called after start_sequence has been called,
15361 which causes get_insns to return the insns for the sequence,
15362 not the function, which will cause leaf_function_p to return
15363 the incorrect result.
15365 to know about leaf functions once reload has completed, and the
15366 frame size cannot be changed after that time, so we can safely
15367 use the cached value. */
15369 if (reload_completed)
15372 /* Initially this is the size of the local variables. It will translated
15373 into an offset once we have determined the size of preceding data. */
15374 frame_size = ROUND_UP_WORD (get_frame_size ());
15376 leaf = leaf_function_p ();
15378 /* Space for variadic functions. */
15379 offsets->saved_args = crtl->args.pretend_args_size;
15381 /* In Thumb mode this is incorrect, but never used. */
15382 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15383 arm_compute_static_chain_stack_bytes();
15387 unsigned int regno;
15389 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15390 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15391 saved = core_saved;
15393 /* We know that SP will be doubleword aligned on entry, and we must
15394 preserve that condition at any subroutine call. We also require the
15395 soft frame pointer to be doubleword aligned. */
15397 if (TARGET_REALLY_IWMMXT)
15399 /* Check for the call-saved iWMMXt registers. */
15400 for (regno = FIRST_IWMMXT_REGNUM;
15401 regno <= LAST_IWMMXT_REGNUM;
15403 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15407 func_type = arm_current_func_type ();
15408 if (! IS_VOLATILE (func_type))
15410 /* Space for saved FPA registers. */
15411 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15412 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15415 /* Space for saved VFP registers. */
15416 if (TARGET_HARD_FLOAT && TARGET_VFP)
15417 saved += arm_get_vfp_saved_size ();
15420 else /* TARGET_THUMB1 */
15422 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15423 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15424 saved = core_saved;
15425 if (TARGET_BACKTRACE)
15429 /* Saved registers include the stack frame. */
15430 offsets->saved_regs = offsets->saved_args + saved +
15431 arm_compute_static_chain_stack_bytes();
15432 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15433 /* A leaf function does not need any stack alignment if it has nothing
15435 if (leaf && frame_size == 0
15436 /* However if it calls alloca(), we have a dynamically allocated
15437 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15438 && ! cfun->calls_alloca)
15440 offsets->outgoing_args = offsets->soft_frame;
15441 offsets->locals_base = offsets->soft_frame;
15445 /* Ensure SFP has the correct alignment. */
15446 if (ARM_DOUBLEWORD_ALIGN
15447 && (offsets->soft_frame & 7))
15449 offsets->soft_frame += 4;
15450 /* Try to align stack by pushing an extra reg. Don't bother doing this
15451 when there is a stack frame as the alignment will be rolled into
15452 the normal stack adjustment. */
15453 if (frame_size + crtl->outgoing_args_size == 0)
15457 /* If it is safe to use r3, then do so. This sometimes
15458 generates better code on Thumb-2 by avoiding the need to
15459 use 32-bit push/pop instructions. */
15460 if (! any_sibcall_uses_r3 ()
15461 && arm_size_return_regs () <= 12
15462 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15467 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15469 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15478 offsets->saved_regs += 4;
15479 offsets->saved_regs_mask |= (1 << reg);
15484 offsets->locals_base = offsets->soft_frame + frame_size;
15485 offsets->outgoing_args = (offsets->locals_base
15486 + crtl->outgoing_args_size);
15488 if (ARM_DOUBLEWORD_ALIGN)
15490 /* Ensure SP remains doubleword aligned. */
15491 if (offsets->outgoing_args & 7)
15492 offsets->outgoing_args += 4;
15493 gcc_assert (!(offsets->outgoing_args & 7));
15500 /* Calculate the relative offsets for the different stack pointers. Positive
15501 offsets are in the direction of stack growth. */
15504 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15506 arm_stack_offsets *offsets;
15508 offsets = arm_get_frame_offsets ();
15510 /* OK, now we have enough information to compute the distances.
15511 There must be an entry in these switch tables for each pair
15512 of registers in ELIMINABLE_REGS, even if some of the entries
15513 seem to be redundant or useless. */
15516 case ARG_POINTER_REGNUM:
15519 case THUMB_HARD_FRAME_POINTER_REGNUM:
15522 case FRAME_POINTER_REGNUM:
15523 /* This is the reverse of the soft frame pointer
15524 to hard frame pointer elimination below. */
15525 return offsets->soft_frame - offsets->saved_args;
15527 case ARM_HARD_FRAME_POINTER_REGNUM:
15528 /* This is only non-zero in the case where the static chain register
15529 is stored above the frame. */
15530 return offsets->frame - offsets->saved_args - 4;
15532 case STACK_POINTER_REGNUM:
15533 /* If nothing has been pushed on the stack at all
15534 then this will return -4. This *is* correct! */
15535 return offsets->outgoing_args - (offsets->saved_args + 4);
15538 gcc_unreachable ();
15540 gcc_unreachable ();
15542 case FRAME_POINTER_REGNUM:
15545 case THUMB_HARD_FRAME_POINTER_REGNUM:
15548 case ARM_HARD_FRAME_POINTER_REGNUM:
15549 /* The hard frame pointer points to the top entry in the
15550 stack frame. The soft frame pointer to the bottom entry
15551 in the stack frame. If there is no stack frame at all,
15552 then they are identical. */
15554 return offsets->frame - offsets->soft_frame;
15556 case STACK_POINTER_REGNUM:
15557 return offsets->outgoing_args - offsets->soft_frame;
15560 gcc_unreachable ();
15562 gcc_unreachable ();
15565 /* You cannot eliminate from the stack pointer.
15566 In theory you could eliminate from the hard frame
15567 pointer to the stack pointer, but this will never
15568 happen, since if a stack frame is not needed the
15569 hard frame pointer will never be used. */
15570 gcc_unreachable ();
15574 /* Given FROM and TO register numbers, say whether this elimination is
15575 allowed. Frame pointer elimination is automatically handled.
15577 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15578 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15579 pointer, we must eliminate FRAME_POINTER_REGNUM into
15580 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15581 ARG_POINTER_REGNUM. */
15584 arm_can_eliminate (const int from, const int to)
15586 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15587 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15588 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15589 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15593 /* Emit RTL to save coprocessor registers on function entry. Returns the
15594 number of bytes pushed. */
15597 arm_save_coproc_regs(void)
15599 int saved_size = 0;
15601 unsigned start_reg;
15604 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15605 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15607 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15608 insn = gen_rtx_MEM (V2SImode, insn);
15609 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15610 RTX_FRAME_RELATED_P (insn) = 1;
15614 /* Save any floating point call-saved registers used by this
15616 if (TARGET_FPA_EMU2)
15618 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15619 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15621 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15622 insn = gen_rtx_MEM (XFmode, insn);
15623 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15624 RTX_FRAME_RELATED_P (insn) = 1;
15630 start_reg = LAST_FPA_REGNUM;
15632 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15634 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15636 if (start_reg - reg == 3)
15638 insn = emit_sfm (reg, 4);
15639 RTX_FRAME_RELATED_P (insn) = 1;
15641 start_reg = reg - 1;
15646 if (start_reg != reg)
15648 insn = emit_sfm (reg + 1, start_reg - reg);
15649 RTX_FRAME_RELATED_P (insn) = 1;
15650 saved_size += (start_reg - reg) * 12;
15652 start_reg = reg - 1;
15656 if (start_reg != reg)
15658 insn = emit_sfm (reg + 1, start_reg - reg);
15659 saved_size += (start_reg - reg) * 12;
15660 RTX_FRAME_RELATED_P (insn) = 1;
15663 if (TARGET_HARD_FLOAT && TARGET_VFP)
15665 start_reg = FIRST_VFP_REGNUM;
15667 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15669 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15670 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15672 if (start_reg != reg)
15673 saved_size += vfp_emit_fstmd (start_reg,
15674 (reg - start_reg) / 2);
15675 start_reg = reg + 2;
15678 if (start_reg != reg)
15679 saved_size += vfp_emit_fstmd (start_reg,
15680 (reg - start_reg) / 2);
15686 /* Set the Thumb frame pointer from the stack pointer. */
15689 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15691 HOST_WIDE_INT amount;
15694 amount = offsets->outgoing_args - offsets->locals_base;
15696 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15697 stack_pointer_rtx, GEN_INT (amount)));
15700 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15701 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15702 expects the first two operands to be the same. */
15705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15707 hard_frame_pointer_rtx));
15711 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15712 hard_frame_pointer_rtx,
15713 stack_pointer_rtx));
15715 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15716 plus_constant (stack_pointer_rtx, amount));
15717 RTX_FRAME_RELATED_P (dwarf) = 1;
15718 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15721 RTX_FRAME_RELATED_P (insn) = 1;
15724 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15727 arm_expand_prologue (void)
15732 unsigned long live_regs_mask;
15733 unsigned long func_type;
15735 int saved_pretend_args = 0;
15736 int saved_regs = 0;
15737 unsigned HOST_WIDE_INT args_to_push;
15738 arm_stack_offsets *offsets;
15740 func_type = arm_current_func_type ();
15742 /* Naked functions don't have prologues. */
15743 if (IS_NAKED (func_type))
15746 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15747 args_to_push = crtl->args.pretend_args_size;
15749 /* Compute which register we will have to save onto the stack. */
15750 offsets = arm_get_frame_offsets ();
15751 live_regs_mask = offsets->saved_regs_mask;
15753 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15755 if (IS_STACKALIGN (func_type))
15760 /* Handle a word-aligned stack pointer. We generate the following:
15765 <save and restore r0 in normal prologue/epilogue>
15769 The unwinder doesn't need to know about the stack realignment.
15770 Just tell it we saved SP in r0. */
15771 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15773 r0 = gen_rtx_REG (SImode, 0);
15774 r1 = gen_rtx_REG (SImode, 1);
15775 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15776 compiler won't choke. */
15777 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15778 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15779 insn = gen_movsi (r0, stack_pointer_rtx);
15780 RTX_FRAME_RELATED_P (insn) = 1;
15781 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15783 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15784 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15787 /* For APCS frames, if IP register is clobbered
15788 when creating frame, save that register in a special
15790 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15792 if (IS_INTERRUPT (func_type))
15794 /* Interrupt functions must not corrupt any registers.
15795 Creating a frame pointer however, corrupts the IP
15796 register, so we must push it first. */
15797 insn = emit_multi_reg_push (1 << IP_REGNUM);
15799 /* Do not set RTX_FRAME_RELATED_P on this insn.
15800 The dwarf stack unwinding code only wants to see one
15801 stack decrement per function, and this is not it. If
15802 this instruction is labeled as being part of the frame
15803 creation sequence then dwarf2out_frame_debug_expr will
15804 die when it encounters the assignment of IP to FP
15805 later on, since the use of SP here establishes SP as
15806 the CFA register and not IP.
15808 Anyway this instruction is not really part of the stack
15809 frame creation although it is part of the prologue. */
15811 else if (IS_NESTED (func_type))
15813 /* The Static chain register is the same as the IP register
15814 used as a scratch register during stack frame creation.
15815 To get around this need to find somewhere to store IP
15816 whilst the frame is being created. We try the following
15819 1. The last argument register.
15820 2. A slot on the stack above the frame. (This only
15821 works if the function is not a varargs function).
15822 3. Register r3, after pushing the argument registers
15825 Note - we only need to tell the dwarf2 backend about the SP
15826 adjustment in the second variant; the static chain register
15827 doesn't need to be unwound, as it doesn't contain a value
15828 inherited from the caller. */
15830 if (df_regs_ever_live_p (3) == false)
15831 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15832 else if (args_to_push == 0)
15836 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15839 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15840 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15843 /* Just tell the dwarf backend that we adjusted SP. */
15844 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15845 plus_constant (stack_pointer_rtx,
15847 RTX_FRAME_RELATED_P (insn) = 1;
15848 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15852 /* Store the args on the stack. */
15853 if (cfun->machine->uses_anonymous_args)
15854 insn = emit_multi_reg_push
15855 ((0xf0 >> (args_to_push / 4)) & 0xf);
15858 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15859 GEN_INT (- args_to_push)));
15861 RTX_FRAME_RELATED_P (insn) = 1;
15863 saved_pretend_args = 1;
15864 fp_offset = args_to_push;
15867 /* Now reuse r3 to preserve IP. */
15868 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15872 insn = emit_set_insn (ip_rtx,
15873 plus_constant (stack_pointer_rtx, fp_offset));
15874 RTX_FRAME_RELATED_P (insn) = 1;
15879 /* Push the argument registers, or reserve space for them. */
15880 if (cfun->machine->uses_anonymous_args)
15881 insn = emit_multi_reg_push
15882 ((0xf0 >> (args_to_push / 4)) & 0xf);
15885 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15886 GEN_INT (- args_to_push)));
15887 RTX_FRAME_RELATED_P (insn) = 1;
15890 /* If this is an interrupt service routine, and the link register
15891 is going to be pushed, and we're not generating extra
15892 push of IP (needed when frame is needed and frame layout if apcs),
15893 subtracting four from LR now will mean that the function return
15894 can be done with a single instruction. */
15895 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15896 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15897 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15900 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15902 emit_set_insn (lr, plus_constant (lr, -4));
15905 if (live_regs_mask)
15907 saved_regs += bit_count (live_regs_mask) * 4;
15908 if (optimize_size && !frame_pointer_needed
15909 && saved_regs == offsets->saved_regs - offsets->saved_args)
15911 /* If no coprocessor registers are being pushed and we don't have
15912 to worry about a frame pointer then push extra registers to
15913 create the stack frame. This is done is a way that does not
15914 alter the frame layout, so is independent of the epilogue. */
15918 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15920 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15921 if (frame && n * 4 >= frame)
15924 live_regs_mask |= (1 << n) - 1;
15925 saved_regs += frame;
15928 insn = emit_multi_reg_push (live_regs_mask);
15929 RTX_FRAME_RELATED_P (insn) = 1;
15932 if (! IS_VOLATILE (func_type))
15933 saved_regs += arm_save_coproc_regs ();
15935 if (frame_pointer_needed && TARGET_ARM)
15937 /* Create the new frame pointer. */
15938 if (TARGET_APCS_FRAME)
15940 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15941 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15942 RTX_FRAME_RELATED_P (insn) = 1;
15944 if (IS_NESTED (func_type))
15946 /* Recover the static chain register. */
15947 if (!df_regs_ever_live_p (3)
15948 || saved_pretend_args)
15949 insn = gen_rtx_REG (SImode, 3);
15950 else /* if (crtl->args.pretend_args_size == 0) */
15952 insn = plus_constant (hard_frame_pointer_rtx, 4);
15953 insn = gen_frame_mem (SImode, insn);
15955 emit_set_insn (ip_rtx, insn);
15956 /* Add a USE to stop propagate_one_insn() from barfing. */
15957 emit_insn (gen_prologue_use (ip_rtx));
15962 insn = GEN_INT (saved_regs - 4);
15963 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15964 stack_pointer_rtx, insn));
15965 RTX_FRAME_RELATED_P (insn) = 1;
15969 if (flag_stack_usage)
15970 current_function_static_stack_size
15971 = offsets->outgoing_args - offsets->saved_args;
15973 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15975 /* This add can produce multiple insns for a large constant, so we
15976 need to get tricky. */
15977 rtx last = get_last_insn ();
15979 amount = GEN_INT (offsets->saved_args + saved_regs
15980 - offsets->outgoing_args);
15982 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15986 last = last ? NEXT_INSN (last) : get_insns ();
15987 RTX_FRAME_RELATED_P (last) = 1;
15989 while (last != insn);
15991 /* If the frame pointer is needed, emit a special barrier that
15992 will prevent the scheduler from moving stores to the frame
15993 before the stack adjustment. */
15994 if (frame_pointer_needed)
15995 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15996 hard_frame_pointer_rtx));
16000 if (frame_pointer_needed && TARGET_THUMB2)
16001 thumb_set_frame_pointer (offsets);
16003 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16005 unsigned long mask;
16007 mask = live_regs_mask;
16008 mask &= THUMB2_WORK_REGS;
16009 if (!IS_NESTED (func_type))
16010 mask |= (1 << IP_REGNUM);
16011 arm_load_pic_register (mask);
16014 /* If we are profiling, make sure no instructions are scheduled before
16015 the call to mcount. Similarly if the user has requested no
16016 scheduling in the prolog. Similarly if we want non-call exceptions
16017 using the EABI unwinder, to prevent faulting instructions from being
16018 swapped with a stack adjustment. */
16019 if (crtl->profile || !TARGET_SCHED_PROLOG
16020 || (arm_except_unwind_info (&global_options) == UI_TARGET
16021 && cfun->can_throw_non_call_exceptions))
16022 emit_insn (gen_blockage ());
16024 /* If the link register is being kept alive, with the return address in it,
16025 then make sure that it does not get reused by the ce2 pass. */
16026 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16027 cfun->machine->lr_save_eliminated = 1;
16030 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16032 arm_print_condition (FILE *stream)
16034 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16036 /* Branch conversion is not implemented for Thumb-2. */
16039 output_operand_lossage ("predicated Thumb instruction");
16042 if (current_insn_predicate != NULL)
16044 output_operand_lossage
16045 ("predicated instruction in conditional sequence");
16049 fputs (arm_condition_codes[arm_current_cc], stream);
16051 else if (current_insn_predicate)
16053 enum arm_cond_code code;
16057 output_operand_lossage ("predicated Thumb instruction");
16061 code = get_arm_condition_code (current_insn_predicate);
16062 fputs (arm_condition_codes[code], stream);
16067 /* If CODE is 'd', then the X is a condition operand and the instruction
16068 should only be executed if the condition is true.
16069 if CODE is 'D', then the X is a condition operand and the instruction
16070 should only be executed if the condition is false: however, if the mode
16071 of the comparison is CCFPEmode, then always execute the instruction -- we
16072 do this because in these circumstances !GE does not necessarily imply LT;
16073 in these cases the instruction pattern will take care to make sure that
16074 an instruction containing %d will follow, thereby undoing the effects of
16075 doing this instruction unconditionally.
16076 If CODE is 'N' then X is a floating point operand that must be negated
16078 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16079 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16081 arm_print_operand (FILE *stream, rtx x, int code)
16086 fputs (ASM_COMMENT_START, stream);
16090 fputs (user_label_prefix, stream);
16094 fputs (REGISTER_PREFIX, stream);
16098 arm_print_condition (stream);
16102 /* Nothing in unified syntax, otherwise the current condition code. */
16103 if (!TARGET_UNIFIED_ASM)
16104 arm_print_condition (stream);
16108 /* The current condition code in unified syntax, otherwise nothing. */
16109 if (TARGET_UNIFIED_ASM)
16110 arm_print_condition (stream);
16114 /* The current condition code for a condition code setting instruction.
16115 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16116 if (TARGET_UNIFIED_ASM)
16118 fputc('s', stream);
16119 arm_print_condition (stream);
16123 arm_print_condition (stream);
16124 fputc('s', stream);
16129 /* If the instruction is conditionally executed then print
16130 the current condition code, otherwise print 's'. */
16131 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16132 if (current_insn_predicate)
16133 arm_print_condition (stream);
16135 fputc('s', stream);
16138 /* %# is a "break" sequence. It doesn't output anything, but is used to
16139 separate e.g. operand numbers from following text, if that text consists
16140 of further digits which we don't want to be part of the operand
16148 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16149 r = real_value_negate (&r);
16150 fprintf (stream, "%s", fp_const_from_val (&r));
16154 /* An integer or symbol address without a preceding # sign. */
16156 switch (GET_CODE (x))
16159 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16163 output_addr_const (stream, x);
16167 gcc_unreachable ();
16172 if (GET_CODE (x) == CONST_INT)
16175 val = ARM_SIGN_EXTEND (~INTVAL (x));
16176 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16180 putc ('~', stream);
16181 output_addr_const (stream, x);
16186 /* The low 16 bits of an immediate constant. */
16187 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16191 fprintf (stream, "%s", arithmetic_instr (x, 1));
16194 /* Truncate Cirrus shift counts. */
16196 if (GET_CODE (x) == CONST_INT)
16198 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16201 arm_print_operand (stream, x, 0);
16205 fprintf (stream, "%s", arithmetic_instr (x, 0));
16213 if (!shift_operator (x, SImode))
16215 output_operand_lossage ("invalid shift operand");
16219 shift = shift_op (x, &val);
16223 fprintf (stream, ", %s ", shift);
16225 arm_print_operand (stream, XEXP (x, 1), 0);
16227 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16232 /* An explanation of the 'Q', 'R' and 'H' register operands:
16234 In a pair of registers containing a DI or DF value the 'Q'
16235 operand returns the register number of the register containing
16236 the least significant part of the value. The 'R' operand returns
16237 the register number of the register containing the most
16238 significant part of the value.
16240 The 'H' operand returns the higher of the two register numbers.
16241 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16242 same as the 'Q' operand, since the most significant part of the
16243 value is held in the lower number register. The reverse is true
16244 on systems where WORDS_BIG_ENDIAN is false.
16246 The purpose of these operands is to distinguish between cases
16247 where the endian-ness of the values is important (for example
16248 when they are added together), and cases where the endian-ness
16249 is irrelevant, but the order of register operations is important.
16250 For example when loading a value from memory into a register
16251 pair, the endian-ness does not matter. Provided that the value
16252 from the lower memory address is put into the lower numbered
16253 register, and the value from the higher address is put into the
16254 higher numbered register, the load will work regardless of whether
16255 the value being loaded is big-wordian or little-wordian. The
16256 order of the two register loads can matter however, if the address
16257 of the memory location is actually held in one of the registers
16258 being overwritten by the load.
16260 The 'Q' and 'R' constraints are also available for 64-bit
16263 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16265 rtx part = gen_lowpart (SImode, x);
16266 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16270 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16272 output_operand_lossage ("invalid operand for code '%c'", code);
16276 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16280 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16282 enum machine_mode mode = GET_MODE (x);
16285 if (mode == VOIDmode)
16287 part = gen_highpart_mode (SImode, mode, x);
16288 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16292 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16294 output_operand_lossage ("invalid operand for code '%c'", code);
16298 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16302 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16304 output_operand_lossage ("invalid operand for code '%c'", code);
16308 asm_fprintf (stream, "%r", REGNO (x) + 1);
16312 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16314 output_operand_lossage ("invalid operand for code '%c'", code);
16318 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16322 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16324 output_operand_lossage ("invalid operand for code '%c'", code);
16328 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16332 asm_fprintf (stream, "%r",
16333 GET_CODE (XEXP (x, 0)) == REG
16334 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16338 asm_fprintf (stream, "{%r-%r}",
16340 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16343 /* Like 'M', but writing doubleword vector registers, for use by Neon
16347 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16348 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16350 asm_fprintf (stream, "{d%d}", regno);
16352 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16357 /* CONST_TRUE_RTX means always -- that's the default. */
16358 if (x == const_true_rtx)
16361 if (!COMPARISON_P (x))
16363 output_operand_lossage ("invalid operand for code '%c'", code);
16367 fputs (arm_condition_codes[get_arm_condition_code (x)],
16372 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16373 want to do that. */
16374 if (x == const_true_rtx)
16376 output_operand_lossage ("instruction never executed");
16379 if (!COMPARISON_P (x))
16381 output_operand_lossage ("invalid operand for code '%c'", code);
16385 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16386 (get_arm_condition_code (x))],
16390 /* Cirrus registers can be accessed in a variety of ways:
16391 single floating point (f)
16392 double floating point (d)
16394 64bit integer (dx). */
16395 case 'W': /* Cirrus register in F mode. */
16396 case 'X': /* Cirrus register in D mode. */
16397 case 'Y': /* Cirrus register in FX mode. */
16398 case 'Z': /* Cirrus register in DX mode. */
16399 gcc_assert (GET_CODE (x) == REG
16400 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16402 fprintf (stream, "mv%s%s",
16404 : code == 'X' ? "d"
16405 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16409 /* Print cirrus register in the mode specified by the register's mode. */
16412 int mode = GET_MODE (x);
16414 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16416 output_operand_lossage ("invalid operand for code '%c'", code);
16420 fprintf (stream, "mv%s%s",
16421 mode == DFmode ? "d"
16422 : mode == SImode ? "fx"
16423 : mode == DImode ? "dx"
16424 : "f", reg_names[REGNO (x)] + 2);
16430 if (GET_CODE (x) != REG
16431 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16432 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16433 /* Bad value for wCG register number. */
16435 output_operand_lossage ("invalid operand for code '%c'", code);
16440 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16443 /* Print an iWMMXt control register name. */
16445 if (GET_CODE (x) != CONST_INT
16447 || INTVAL (x) >= 16)
16448 /* Bad value for wC register number. */
16450 output_operand_lossage ("invalid operand for code '%c'", code);
16456 static const char * wc_reg_names [16] =
16458 "wCID", "wCon", "wCSSF", "wCASF",
16459 "wC4", "wC5", "wC6", "wC7",
16460 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16461 "wC12", "wC13", "wC14", "wC15"
16464 fprintf (stream, wc_reg_names [INTVAL (x)]);
16468 /* Print the high single-precision register of a VFP double-precision
16472 int mode = GET_MODE (x);
16475 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16477 output_operand_lossage ("invalid operand for code '%c'", code);
16482 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16484 output_operand_lossage ("invalid operand for code '%c'", code);
16488 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16492 /* Print a VFP/Neon double precision or quad precision register name. */
16496 int mode = GET_MODE (x);
16497 int is_quad = (code == 'q');
16500 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16502 output_operand_lossage ("invalid operand for code '%c'", code);
16506 if (GET_CODE (x) != REG
16507 || !IS_VFP_REGNUM (REGNO (x)))
16509 output_operand_lossage ("invalid operand for code '%c'", code);
16514 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16515 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16517 output_operand_lossage ("invalid operand for code '%c'", code);
16521 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16522 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16526 /* These two codes print the low/high doubleword register of a Neon quad
16527 register, respectively. For pair-structure types, can also print
16528 low/high quadword registers. */
16532 int mode = GET_MODE (x);
16535 if ((GET_MODE_SIZE (mode) != 16
16536 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16538 output_operand_lossage ("invalid operand for code '%c'", code);
16543 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16545 output_operand_lossage ("invalid operand for code '%c'", code);
16549 if (GET_MODE_SIZE (mode) == 16)
16550 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16551 + (code == 'f' ? 1 : 0));
16553 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16554 + (code == 'f' ? 1 : 0));
16558 /* Print a VFPv3 floating-point constant, represented as an integer
16562 int index = vfp3_const_double_index (x);
16563 gcc_assert (index != -1);
16564 fprintf (stream, "%d", index);
16568 /* Print bits representing opcode features for Neon.
16570 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16571 and polynomials as unsigned.
16573 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16575 Bit 2 is 1 for rounding functions, 0 otherwise. */
16577 /* Identify the type as 's', 'u', 'p' or 'f'. */
16580 HOST_WIDE_INT bits = INTVAL (x);
16581 fputc ("uspf"[bits & 3], stream);
16585 /* Likewise, but signed and unsigned integers are both 'i'. */
16588 HOST_WIDE_INT bits = INTVAL (x);
16589 fputc ("iipf"[bits & 3], stream);
16593 /* As for 'T', but emit 'u' instead of 'p'. */
16596 HOST_WIDE_INT bits = INTVAL (x);
16597 fputc ("usuf"[bits & 3], stream);
16601 /* Bit 2: rounding (vs none). */
16604 HOST_WIDE_INT bits = INTVAL (x);
16605 fputs ((bits & 4) != 0 ? "r" : "", stream);
16609 /* Memory operand for vld1/vst1 instruction. */
16613 bool postinc = FALSE;
16614 unsigned align, memsize, align_bits;
16616 gcc_assert (GET_CODE (x) == MEM);
16617 addr = XEXP (x, 0);
16618 if (GET_CODE (addr) == POST_INC)
16621 addr = XEXP (addr, 0);
16623 asm_fprintf (stream, "[%r", REGNO (addr));
16625 /* We know the alignment of this access, so we can emit a hint in the
16626 instruction (for some alignments) as an aid to the memory subsystem
16628 align = MEM_ALIGN (x) >> 3;
16629 memsize = INTVAL (MEM_SIZE (x));
16631 /* Only certain alignment specifiers are supported by the hardware. */
16632 if (memsize == 16 && (align % 32) == 0)
16634 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16636 else if ((align % 8) == 0)
16641 if (align_bits != 0)
16642 asm_fprintf (stream, ":%d", align_bits);
16644 asm_fprintf (stream, "]");
16647 fputs("!", stream);
16655 gcc_assert (GET_CODE (x) == MEM);
16656 addr = XEXP (x, 0);
16657 gcc_assert (GET_CODE (addr) == REG);
16658 asm_fprintf (stream, "[%r]", REGNO (addr));
16662 /* Translate an S register number into a D register number and element index. */
16665 int mode = GET_MODE (x);
16668 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16670 output_operand_lossage ("invalid operand for code '%c'", code);
16675 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16677 output_operand_lossage ("invalid operand for code '%c'", code);
16681 regno = regno - FIRST_VFP_REGNUM;
16682 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16686 /* Register specifier for vld1.16/vst1.16. Translate the S register
16687 number into a D register number and element index. */
16690 int mode = GET_MODE (x);
16693 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16695 output_operand_lossage ("invalid operand for code '%c'", code);
16700 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16702 output_operand_lossage ("invalid operand for code '%c'", code);
16706 regno = regno - FIRST_VFP_REGNUM;
16707 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16714 output_operand_lossage ("missing operand");
16718 switch (GET_CODE (x))
16721 asm_fprintf (stream, "%r", REGNO (x));
16725 output_memory_reference_mode = GET_MODE (x);
16726 output_address (XEXP (x, 0));
16733 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16734 sizeof (fpstr), 0, 1);
16735 fprintf (stream, "#%s", fpstr);
16738 fprintf (stream, "#%s", fp_immediate_constant (x));
16742 gcc_assert (GET_CODE (x) != NEG);
16743 fputc ('#', stream);
16744 if (GET_CODE (x) == HIGH)
16746 fputs (":lower16:", stream);
16750 output_addr_const (stream, x);
16756 /* Target hook for printing a memory address. */
16758 arm_print_operand_address (FILE *stream, rtx x)
16762 int is_minus = GET_CODE (x) == MINUS;
16764 if (GET_CODE (x) == REG)
16765 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16766 else if (GET_CODE (x) == PLUS || is_minus)
16768 rtx base = XEXP (x, 0);
16769 rtx index = XEXP (x, 1);
16770 HOST_WIDE_INT offset = 0;
16771 if (GET_CODE (base) != REG
16772 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16774 /* Ensure that BASE is a register. */
16775 /* (one of them must be). */
16776 /* Also ensure the SP is not used as in index register. */
16781 switch (GET_CODE (index))
16784 offset = INTVAL (index);
16787 asm_fprintf (stream, "[%r, #%wd]",
16788 REGNO (base), offset);
16792 asm_fprintf (stream, "[%r, %s%r]",
16793 REGNO (base), is_minus ? "-" : "",
16803 asm_fprintf (stream, "[%r, %s%r",
16804 REGNO (base), is_minus ? "-" : "",
16805 REGNO (XEXP (index, 0)));
16806 arm_print_operand (stream, index, 'S');
16807 fputs ("]", stream);
16812 gcc_unreachable ();
16815 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16816 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16818 extern enum machine_mode output_memory_reference_mode;
16820 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16822 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16823 asm_fprintf (stream, "[%r, #%s%d]!",
16824 REGNO (XEXP (x, 0)),
16825 GET_CODE (x) == PRE_DEC ? "-" : "",
16826 GET_MODE_SIZE (output_memory_reference_mode));
16828 asm_fprintf (stream, "[%r], #%s%d",
16829 REGNO (XEXP (x, 0)),
16830 GET_CODE (x) == POST_DEC ? "-" : "",
16831 GET_MODE_SIZE (output_memory_reference_mode));
16833 else if (GET_CODE (x) == PRE_MODIFY)
16835 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16836 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16837 asm_fprintf (stream, "#%wd]!",
16838 INTVAL (XEXP (XEXP (x, 1), 1)));
16840 asm_fprintf (stream, "%r]!",
16841 REGNO (XEXP (XEXP (x, 1), 1)));
16843 else if (GET_CODE (x) == POST_MODIFY)
16845 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16846 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16847 asm_fprintf (stream, "#%wd",
16848 INTVAL (XEXP (XEXP (x, 1), 1)));
16850 asm_fprintf (stream, "%r",
16851 REGNO (XEXP (XEXP (x, 1), 1)));
16853 else output_addr_const (stream, x);
16857 if (GET_CODE (x) == REG)
16858 asm_fprintf (stream, "[%r]", REGNO (x));
16859 else if (GET_CODE (x) == POST_INC)
16860 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16861 else if (GET_CODE (x) == PLUS)
16863 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16864 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16865 asm_fprintf (stream, "[%r, #%wd]",
16866 REGNO (XEXP (x, 0)),
16867 INTVAL (XEXP (x, 1)));
16869 asm_fprintf (stream, "[%r, %r]",
16870 REGNO (XEXP (x, 0)),
16871 REGNO (XEXP (x, 1)));
16874 output_addr_const (stream, x);
16878 /* Target hook for indicating whether a punctuation character for
16879 TARGET_PRINT_OPERAND is valid. */
16881 arm_print_operand_punct_valid_p (unsigned char code)
16883 return (code == '@' || code == '|' || code == '.'
16884 || code == '(' || code == ')' || code == '#'
16885 || (TARGET_32BIT && (code == '?'))
16886 || (TARGET_THUMB2 && (code == '!'))
16887 || (TARGET_THUMB && (code == '_')));
16890 /* Target hook for assembling integer objects. The ARM version needs to
16891 handle word-sized values specially. */
16893 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16895 enum machine_mode mode;
16897 if (size == UNITS_PER_WORD && aligned_p)
16899 fputs ("\t.word\t", asm_out_file);
16900 output_addr_const (asm_out_file, x);
16902 /* Mark symbols as position independent. We only do this in the
16903 .text segment, not in the .data segment. */
16904 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16905 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16907 /* See legitimize_pic_address for an explanation of the
16908 TARGET_VXWORKS_RTP check. */
16909 if (TARGET_VXWORKS_RTP
16910 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16911 fputs ("(GOT)", asm_out_file);
16913 fputs ("(GOTOFF)", asm_out_file);
16915 fputc ('\n', asm_out_file);
16919 mode = GET_MODE (x);
16921 if (arm_vector_mode_supported_p (mode))
16925 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16927 units = CONST_VECTOR_NUNITS (x);
16928 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16930 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16931 for (i = 0; i < units; i++)
16933 rtx elt = CONST_VECTOR_ELT (x, i);
16935 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16938 for (i = 0; i < units; i++)
16940 rtx elt = CONST_VECTOR_ELT (x, i);
16941 REAL_VALUE_TYPE rval;
16943 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16946 (rval, GET_MODE_INNER (mode),
16947 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16953 return default_assemble_integer (x, size, aligned_p);
16957 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16961 if (!TARGET_AAPCS_BASED)
16964 default_named_section_asm_out_constructor
16965 : default_named_section_asm_out_destructor) (symbol, priority);
16969 /* Put these in the .init_array section, using a special relocation. */
16970 if (priority != DEFAULT_INIT_PRIORITY)
16973 sprintf (buf, "%s.%.5u",
16974 is_ctor ? ".init_array" : ".fini_array",
16976 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16983 switch_to_section (s);
16984 assemble_align (POINTER_SIZE);
16985 fputs ("\t.word\t", asm_out_file);
16986 output_addr_const (asm_out_file, symbol);
16987 fputs ("(target1)\n", asm_out_file);
16990 /* Add a function to the list of static constructors. */
16993 arm_elf_asm_constructor (rtx symbol, int priority)
16995 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16998 /* Add a function to the list of static destructors. */
17001 arm_elf_asm_destructor (rtx symbol, int priority)
17003 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17006 /* A finite state machine takes care of noticing whether or not instructions
17007 can be conditionally executed, and thus decrease execution time and code
17008 size by deleting branch instructions. The fsm is controlled by
17009 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17011 /* The state of the fsm controlling condition codes are:
17012 0: normal, do nothing special
17013 1: make ASM_OUTPUT_OPCODE not output this instruction
17014 2: make ASM_OUTPUT_OPCODE not output this instruction
17015 3: make instructions conditional
17016 4: make instructions conditional
17018 State transitions (state->state by whom under condition):
17019 0 -> 1 final_prescan_insn if the `target' is a label
17020 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17021 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17022 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17023 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17024 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17025 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17026 (the target insn is arm_target_insn).
17028 If the jump clobbers the conditions then we use states 2 and 4.
17030 A similar thing can be done with conditional return insns.
17032 XXX In case the `target' is an unconditional branch, this conditionalising
17033 of the instructions always reduces code size, but not always execution
17034 time. But then, I want to reduce the code size to somewhere near what
17035 /bin/cc produces. */
17037 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17038 instructions. When a COND_EXEC instruction is seen the subsequent
17039 instructions are scanned so that multiple conditional instructions can be
17040 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17041 specify the length and true/false mask for the IT block. These will be
17042 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17044 /* Returns the index of the ARM condition code string in
17045 `arm_condition_codes'. COMPARISON should be an rtx like
17046 `(eq (...) (...))'. */
17047 static enum arm_cond_code
17048 get_arm_condition_code (rtx comparison)
17050 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17051 enum arm_cond_code code;
17052 enum rtx_code comp_code = GET_CODE (comparison);
17054 if (GET_MODE_CLASS (mode) != MODE_CC)
17055 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17056 XEXP (comparison, 1));
17060 case CC_DNEmode: code = ARM_NE; goto dominance;
17061 case CC_DEQmode: code = ARM_EQ; goto dominance;
17062 case CC_DGEmode: code = ARM_GE; goto dominance;
17063 case CC_DGTmode: code = ARM_GT; goto dominance;
17064 case CC_DLEmode: code = ARM_LE; goto dominance;
17065 case CC_DLTmode: code = ARM_LT; goto dominance;
17066 case CC_DGEUmode: code = ARM_CS; goto dominance;
17067 case CC_DGTUmode: code = ARM_HI; goto dominance;
17068 case CC_DLEUmode: code = ARM_LS; goto dominance;
17069 case CC_DLTUmode: code = ARM_CC;
17072 gcc_assert (comp_code == EQ || comp_code == NE);
17074 if (comp_code == EQ)
17075 return ARM_INVERSE_CONDITION_CODE (code);
17081 case NE: return ARM_NE;
17082 case EQ: return ARM_EQ;
17083 case GE: return ARM_PL;
17084 case LT: return ARM_MI;
17085 default: gcc_unreachable ();
17091 case NE: return ARM_NE;
17092 case EQ: return ARM_EQ;
17093 default: gcc_unreachable ();
17099 case NE: return ARM_MI;
17100 case EQ: return ARM_PL;
17101 default: gcc_unreachable ();
17106 /* These encodings assume that AC=1 in the FPA system control
17107 byte. This allows us to handle all cases except UNEQ and
17111 case GE: return ARM_GE;
17112 case GT: return ARM_GT;
17113 case LE: return ARM_LS;
17114 case LT: return ARM_MI;
17115 case NE: return ARM_NE;
17116 case EQ: return ARM_EQ;
17117 case ORDERED: return ARM_VC;
17118 case UNORDERED: return ARM_VS;
17119 case UNLT: return ARM_LT;
17120 case UNLE: return ARM_LE;
17121 case UNGT: return ARM_HI;
17122 case UNGE: return ARM_PL;
17123 /* UNEQ and LTGT do not have a representation. */
17124 case UNEQ: /* Fall through. */
17125 case LTGT: /* Fall through. */
17126 default: gcc_unreachable ();
17132 case NE: return ARM_NE;
17133 case EQ: return ARM_EQ;
17134 case GE: return ARM_LE;
17135 case GT: return ARM_LT;
17136 case LE: return ARM_GE;
17137 case LT: return ARM_GT;
17138 case GEU: return ARM_LS;
17139 case GTU: return ARM_CC;
17140 case LEU: return ARM_CS;
17141 case LTU: return ARM_HI;
17142 default: gcc_unreachable ();
17148 case LTU: return ARM_CS;
17149 case GEU: return ARM_CC;
17150 default: gcc_unreachable ();
17156 case NE: return ARM_NE;
17157 case EQ: return ARM_EQ;
17158 case GEU: return ARM_CS;
17159 case GTU: return ARM_HI;
17160 case LEU: return ARM_LS;
17161 case LTU: return ARM_CC;
17162 default: gcc_unreachable ();
17168 case GE: return ARM_GE;
17169 case LT: return ARM_LT;
17170 case GEU: return ARM_CS;
17171 case LTU: return ARM_CC;
17172 default: gcc_unreachable ();
17178 case NE: return ARM_NE;
17179 case EQ: return ARM_EQ;
17180 case GE: return ARM_GE;
17181 case GT: return ARM_GT;
17182 case LE: return ARM_LE;
17183 case LT: return ARM_LT;
17184 case GEU: return ARM_CS;
17185 case GTU: return ARM_HI;
17186 case LEU: return ARM_LS;
17187 case LTU: return ARM_CC;
17188 default: gcc_unreachable ();
17191 default: gcc_unreachable ();
17195 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17198 thumb2_final_prescan_insn (rtx insn)
17200 rtx first_insn = insn;
17201 rtx body = PATTERN (insn);
17203 enum arm_cond_code code;
17207 /* Remove the previous insn from the count of insns to be output. */
17208 if (arm_condexec_count)
17209 arm_condexec_count--;
17211 /* Nothing to do if we are already inside a conditional block. */
17212 if (arm_condexec_count)
17215 if (GET_CODE (body) != COND_EXEC)
17218 /* Conditional jumps are implemented directly. */
17219 if (GET_CODE (insn) == JUMP_INSN)
17222 predicate = COND_EXEC_TEST (body);
17223 arm_current_cc = get_arm_condition_code (predicate);
17225 n = get_attr_ce_count (insn);
17226 arm_condexec_count = 1;
17227 arm_condexec_mask = (1 << n) - 1;
17228 arm_condexec_masklen = n;
17229 /* See if subsequent instructions can be combined into the same block. */
17232 insn = next_nonnote_insn (insn);
17234 /* Jumping into the middle of an IT block is illegal, so a label or
17235 barrier terminates the block. */
17236 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17239 body = PATTERN (insn);
17240 /* USE and CLOBBER aren't really insns, so just skip them. */
17241 if (GET_CODE (body) == USE
17242 || GET_CODE (body) == CLOBBER)
17245 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17246 if (GET_CODE (body) != COND_EXEC)
17248 /* Allow up to 4 conditionally executed instructions in a block. */
17249 n = get_attr_ce_count (insn);
17250 if (arm_condexec_masklen + n > 4)
17253 predicate = COND_EXEC_TEST (body);
17254 code = get_arm_condition_code (predicate);
17255 mask = (1 << n) - 1;
17256 if (arm_current_cc == code)
17257 arm_condexec_mask |= (mask << arm_condexec_masklen);
17258 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17261 arm_condexec_count++;
17262 arm_condexec_masklen += n;
17264 /* A jump must be the last instruction in a conditional block. */
17265 if (GET_CODE(insn) == JUMP_INSN)
17268 /* Restore recog_data (getting the attributes of other insns can
17269 destroy this array, but final.c assumes that it remains intact
17270 across this call). */
17271 extract_constrain_insn_cached (first_insn);
17275 arm_final_prescan_insn (rtx insn)
17277 /* BODY will hold the body of INSN. */
17278 rtx body = PATTERN (insn);
17280 /* This will be 1 if trying to repeat the trick, and things need to be
17281 reversed if it appears to fail. */
17284 /* If we start with a return insn, we only succeed if we find another one. */
17285 int seeking_return = 0;
17287 /* START_INSN will hold the insn from where we start looking. This is the
17288 first insn after the following code_label if REVERSE is true. */
17289 rtx start_insn = insn;
17291 /* If in state 4, check if the target branch is reached, in order to
17292 change back to state 0. */
17293 if (arm_ccfsm_state == 4)
17295 if (insn == arm_target_insn)
17297 arm_target_insn = NULL;
17298 arm_ccfsm_state = 0;
17303 /* If in state 3, it is possible to repeat the trick, if this insn is an
17304 unconditional branch to a label, and immediately following this branch
17305 is the previous target label which is only used once, and the label this
17306 branch jumps to is not too far off. */
17307 if (arm_ccfsm_state == 3)
17309 if (simplejump_p (insn))
17311 start_insn = next_nonnote_insn (start_insn);
17312 if (GET_CODE (start_insn) == BARRIER)
17314 /* XXX Isn't this always a barrier? */
17315 start_insn = next_nonnote_insn (start_insn);
17317 if (GET_CODE (start_insn) == CODE_LABEL
17318 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17319 && LABEL_NUSES (start_insn) == 1)
17324 else if (GET_CODE (body) == RETURN)
17326 start_insn = next_nonnote_insn (start_insn);
17327 if (GET_CODE (start_insn) == BARRIER)
17328 start_insn = next_nonnote_insn (start_insn);
17329 if (GET_CODE (start_insn) == CODE_LABEL
17330 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17331 && LABEL_NUSES (start_insn) == 1)
17334 seeking_return = 1;
17343 gcc_assert (!arm_ccfsm_state || reverse);
17344 if (GET_CODE (insn) != JUMP_INSN)
17347 /* This jump might be paralleled with a clobber of the condition codes
17348 the jump should always come first */
17349 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17350 body = XVECEXP (body, 0, 0);
17353 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17354 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17357 int fail = FALSE, succeed = FALSE;
17358 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17359 int then_not_else = TRUE;
17360 rtx this_insn = start_insn, label = 0;
17362 /* Register the insn jumped to. */
17365 if (!seeking_return)
17366 label = XEXP (SET_SRC (body), 0);
17368 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17369 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17370 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17372 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17373 then_not_else = FALSE;
17375 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17376 seeking_return = 1;
17377 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17379 seeking_return = 1;
17380 then_not_else = FALSE;
17383 gcc_unreachable ();
17385 /* See how many insns this branch skips, and what kind of insns. If all
17386 insns are okay, and the label or unconditional branch to the same
17387 label is not too far away, succeed. */
17388 for (insns_skipped = 0;
17389 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17393 this_insn = next_nonnote_insn (this_insn);
17397 switch (GET_CODE (this_insn))
17400 /* Succeed if it is the target label, otherwise fail since
17401 control falls in from somewhere else. */
17402 if (this_insn == label)
17404 arm_ccfsm_state = 1;
17412 /* Succeed if the following insn is the target label.
17414 If return insns are used then the last insn in a function
17415 will be a barrier. */
17416 this_insn = next_nonnote_insn (this_insn);
17417 if (this_insn && this_insn == label)
17419 arm_ccfsm_state = 1;
17427 /* The AAPCS says that conditional calls should not be
17428 used since they make interworking inefficient (the
17429 linker can't transform BL<cond> into BLX). That's
17430 only a problem if the machine has BLX. */
17437 /* Succeed if the following insn is the target label, or
17438 if the following two insns are a barrier and the
17440 this_insn = next_nonnote_insn (this_insn);
17441 if (this_insn && GET_CODE (this_insn) == BARRIER)
17442 this_insn = next_nonnote_insn (this_insn);
17444 if (this_insn && this_insn == label
17445 && insns_skipped < max_insns_skipped)
17447 arm_ccfsm_state = 1;
17455 /* If this is an unconditional branch to the same label, succeed.
17456 If it is to another label, do nothing. If it is conditional,
17458 /* XXX Probably, the tests for SET and the PC are
17461 scanbody = PATTERN (this_insn);
17462 if (GET_CODE (scanbody) == SET
17463 && GET_CODE (SET_DEST (scanbody)) == PC)
17465 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17466 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17468 arm_ccfsm_state = 2;
17471 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17474 /* Fail if a conditional return is undesirable (e.g. on a
17475 StrongARM), but still allow this if optimizing for size. */
17476 else if (GET_CODE (scanbody) == RETURN
17477 && !use_return_insn (TRUE, NULL)
17480 else if (GET_CODE (scanbody) == RETURN
17483 arm_ccfsm_state = 2;
17486 else if (GET_CODE (scanbody) == PARALLEL)
17488 switch (get_attr_conds (this_insn))
17498 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17503 /* Instructions using or affecting the condition codes make it
17505 scanbody = PATTERN (this_insn);
17506 if (!(GET_CODE (scanbody) == SET
17507 || GET_CODE (scanbody) == PARALLEL)
17508 || get_attr_conds (this_insn) != CONDS_NOCOND)
17511 /* A conditional cirrus instruction must be followed by
17512 a non Cirrus instruction. However, since we
17513 conditionalize instructions in this function and by
17514 the time we get here we can't add instructions
17515 (nops), because shorten_branches() has already been
17516 called, we will disable conditionalizing Cirrus
17517 instructions to be safe. */
17518 if (GET_CODE (scanbody) != USE
17519 && GET_CODE (scanbody) != CLOBBER
17520 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17530 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17531 arm_target_label = CODE_LABEL_NUMBER (label);
17534 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17536 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17538 this_insn = next_nonnote_insn (this_insn);
17539 gcc_assert (!this_insn
17540 || (GET_CODE (this_insn) != BARRIER
17541 && GET_CODE (this_insn) != CODE_LABEL));
17545 /* Oh, dear! we ran off the end.. give up. */
17546 extract_constrain_insn_cached (insn);
17547 arm_ccfsm_state = 0;
17548 arm_target_insn = NULL;
17551 arm_target_insn = this_insn;
17554 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17557 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17559 if (reverse || then_not_else)
17560 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17563 /* Restore recog_data (getting the attributes of other insns can
17564 destroy this array, but final.c assumes that it remains intact
17565 across this call. */
17566 extract_constrain_insn_cached (insn);
17570 /* Output IT instructions. */
17572 thumb2_asm_output_opcode (FILE * stream)
17577 if (arm_condexec_mask)
17579 for (n = 0; n < arm_condexec_masklen; n++)
17580 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17582 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17583 arm_condition_codes[arm_current_cc]);
17584 arm_condexec_mask = 0;
17588 /* Returns true if REGNO is a valid register
17589 for holding a quantity of type MODE. */
17591 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17593 if (GET_MODE_CLASS (mode) == MODE_CC)
17594 return (regno == CC_REGNUM
17595 || (TARGET_HARD_FLOAT && TARGET_VFP
17596 && regno == VFPCC_REGNUM));
17599 /* For the Thumb we only allow values bigger than SImode in
17600 registers 0 - 6, so that there is always a second low
17601 register available to hold the upper part of the value.
17602 We probably we ought to ensure that the register is the
17603 start of an even numbered register pair. */
17604 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17606 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17607 && IS_CIRRUS_REGNUM (regno))
17608 /* We have outlawed SI values in Cirrus registers because they
17609 reside in the lower 32 bits, but SF values reside in the
17610 upper 32 bits. This causes gcc all sorts of grief. We can't
17611 even split the registers into pairs because Cirrus SI values
17612 get sign extended to 64bits-- aldyh. */
17613 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17615 if (TARGET_HARD_FLOAT && TARGET_VFP
17616 && IS_VFP_REGNUM (regno))
17618 if (mode == SFmode || mode == SImode)
17619 return VFP_REGNO_OK_FOR_SINGLE (regno);
17621 if (mode == DFmode)
17622 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17624 /* VFP registers can hold HFmode values, but there is no point in
17625 putting them there unless we have hardware conversion insns. */
17626 if (mode == HFmode)
17627 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17630 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17631 || (VALID_NEON_QREG_MODE (mode)
17632 && NEON_REGNO_OK_FOR_QUAD (regno))
17633 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17634 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17635 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17636 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17637 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17642 if (TARGET_REALLY_IWMMXT)
17644 if (IS_IWMMXT_GR_REGNUM (regno))
17645 return mode == SImode;
17647 if (IS_IWMMXT_REGNUM (regno))
17648 return VALID_IWMMXT_REG_MODE (mode);
17651 /* We allow almost any value to be stored in the general registers.
17652 Restrict doubleword quantities to even register pairs so that we can
17653 use ldrd. Do not allow very large Neon structure opaque modes in
17654 general registers; they would use too many. */
17655 if (regno <= LAST_ARM_REGNUM)
17656 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17657 && ARM_NUM_REGS (mode) <= 4;
17659 if (regno == FRAME_POINTER_REGNUM
17660 || regno == ARG_POINTER_REGNUM)
17661 /* We only allow integers in the fake hard registers. */
17662 return GET_MODE_CLASS (mode) == MODE_INT;
17664 /* The only registers left are the FPA registers
17665 which we only allow to hold FP values. */
17666 return (TARGET_HARD_FLOAT && TARGET_FPA
17667 && GET_MODE_CLASS (mode) == MODE_FLOAT
17668 && regno >= FIRST_FPA_REGNUM
17669 && regno <= LAST_FPA_REGNUM);
17672 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17673 not used in arm mode. */
17676 arm_regno_class (int regno)
17680 if (regno == STACK_POINTER_REGNUM)
17682 if (regno == CC_REGNUM)
17689 if (TARGET_THUMB2 && regno < 8)
17692 if ( regno <= LAST_ARM_REGNUM
17693 || regno == FRAME_POINTER_REGNUM
17694 || regno == ARG_POINTER_REGNUM)
17695 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17697 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17698 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17700 if (IS_CIRRUS_REGNUM (regno))
17701 return CIRRUS_REGS;
17703 if (IS_VFP_REGNUM (regno))
17705 if (regno <= D7_VFP_REGNUM)
17706 return VFP_D0_D7_REGS;
17707 else if (regno <= LAST_LO_VFP_REGNUM)
17708 return VFP_LO_REGS;
17710 return VFP_HI_REGS;
17713 if (IS_IWMMXT_REGNUM (regno))
17714 return IWMMXT_REGS;
17716 if (IS_IWMMXT_GR_REGNUM (regno))
17717 return IWMMXT_GR_REGS;
17722 /* Handle a special case when computing the offset
17723 of an argument from the frame pointer. */
17725 arm_debugger_arg_offset (int value, rtx addr)
17729 /* We are only interested if dbxout_parms() failed to compute the offset. */
17733 /* We can only cope with the case where the address is held in a register. */
17734 if (GET_CODE (addr) != REG)
17737 /* If we are using the frame pointer to point at the argument, then
17738 an offset of 0 is correct. */
17739 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17742 /* If we are using the stack pointer to point at the
17743 argument, then an offset of 0 is correct. */
17744 /* ??? Check this is consistent with thumb2 frame layout. */
17745 if ((TARGET_THUMB || !frame_pointer_needed)
17746 && REGNO (addr) == SP_REGNUM)
17749 /* Oh dear. The argument is pointed to by a register rather
17750 than being held in a register, or being stored at a known
17751 offset from the frame pointer. Since GDB only understands
17752 those two kinds of argument we must translate the address
17753 held in the register into an offset from the frame pointer.
17754 We do this by searching through the insns for the function
17755 looking to see where this register gets its value. If the
17756 register is initialized from the frame pointer plus an offset
17757 then we are in luck and we can continue, otherwise we give up.
17759 This code is exercised by producing debugging information
17760 for a function with arguments like this:
17762 double func (double a, double b, int c, double d) {return d;}
17764 Without this code the stab for parameter 'd' will be set to
17765 an offset of 0 from the frame pointer, rather than 8. */
17767 /* The if() statement says:
17769 If the insn is a normal instruction
17770 and if the insn is setting the value in a register
17771 and if the register being set is the register holding the address of the argument
17772 and if the address is computing by an addition
17773 that involves adding to a register
17774 which is the frame pointer
17779 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17781 if ( GET_CODE (insn) == INSN
17782 && GET_CODE (PATTERN (insn)) == SET
17783 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17784 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17785 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17786 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17787 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17790 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17799 warning (0, "unable to compute real location of stacked parameter");
17800 value = 8; /* XXX magic hack */
17820 T_MAX /* Size of enum. Keep last. */
17821 } neon_builtin_type_mode;
17823 #define TYPE_MODE_BIT(X) (1 << (X))
17825 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17826 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17827 | TYPE_MODE_BIT (T_DI))
17828 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17829 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17830 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17832 #define v8qi_UP T_V8QI
17833 #define v4hi_UP T_V4HI
17834 #define v2si_UP T_V2SI
17835 #define v2sf_UP T_V2SF
17837 #define v16qi_UP T_V16QI
17838 #define v8hi_UP T_V8HI
17839 #define v4si_UP T_V4SI
17840 #define v4sf_UP T_V4SF
17841 #define v2di_UP T_V2DI
17846 #define UP(X) X##_UP
17879 NEON_LOADSTRUCTLANE,
17881 NEON_STORESTRUCTLANE,
17890 const neon_itype itype;
17891 const neon_builtin_type_mode mode;
17892 const enum insn_code code;
17893 unsigned int fcode;
17894 } neon_builtin_datum;
17896 #define CF(N,X) CODE_FOR_neon_##N##X
17898 #define VAR1(T, N, A) \
17899 {#N, NEON_##T, UP (A), CF (N, A), 0}
17900 #define VAR2(T, N, A, B) \
17902 {#N, NEON_##T, UP (B), CF (N, B), 0}
17903 #define VAR3(T, N, A, B, C) \
17904 VAR2 (T, N, A, B), \
17905 {#N, NEON_##T, UP (C), CF (N, C), 0}
17906 #define VAR4(T, N, A, B, C, D) \
17907 VAR3 (T, N, A, B, C), \
17908 {#N, NEON_##T, UP (D), CF (N, D), 0}
17909 #define VAR5(T, N, A, B, C, D, E) \
17910 VAR4 (T, N, A, B, C, D), \
17911 {#N, NEON_##T, UP (E), CF (N, E), 0}
17912 #define VAR6(T, N, A, B, C, D, E, F) \
17913 VAR5 (T, N, A, B, C, D, E), \
17914 {#N, NEON_##T, UP (F), CF (N, F), 0}
17915 #define VAR7(T, N, A, B, C, D, E, F, G) \
17916 VAR6 (T, N, A, B, C, D, E, F), \
17917 {#N, NEON_##T, UP (G), CF (N, G), 0}
17918 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17919 VAR7 (T, N, A, B, C, D, E, F, G), \
17920 {#N, NEON_##T, UP (H), CF (N, H), 0}
17921 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17922 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17923 {#N, NEON_##T, UP (I), CF (N, I), 0}
17924 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17925 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17926 {#N, NEON_##T, UP (J), CF (N, J), 0}
17928 /* The mode entries in the following table correspond to the "key" type of the
17929 instruction variant, i.e. equivalent to that which would be specified after
17930 the assembler mnemonic, which usually refers to the last vector operand.
17931 (Signed/unsigned/polynomial types are not differentiated between though, and
17932 are all mapped onto the same mode for a given element size.) The modes
17933 listed per instruction should be the same as those defined for that
17934 instruction's pattern in neon.md. */
17936 static neon_builtin_datum neon_builtin_data[] =
17938 VAR10 (BINOP, vadd,
17939 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17940 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17941 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17942 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17943 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17944 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17945 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17946 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17947 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17948 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17949 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17950 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17951 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17952 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17953 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17954 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17955 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17956 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17957 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17958 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17959 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17960 VAR2 (BINOP, vqdmull, v4hi, v2si),
17961 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17962 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17963 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17964 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17965 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17966 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17967 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17968 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17969 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17970 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17971 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17972 VAR10 (BINOP, vsub,
17973 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17974 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17975 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17976 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17977 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17978 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17979 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17980 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17981 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17982 VAR2 (BINOP, vcage, v2sf, v4sf),
17983 VAR2 (BINOP, vcagt, v2sf, v4sf),
17984 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17985 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17986 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17987 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17988 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17989 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17990 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17991 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17992 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17993 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17994 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17995 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17996 VAR2 (BINOP, vrecps, v2sf, v4sf),
17997 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17998 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17999 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18000 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18001 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18002 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18003 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18004 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18005 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18006 VAR2 (UNOP, vcnt, v8qi, v16qi),
18007 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18008 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18009 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18010 /* FIXME: vget_lane supports more variants than this! */
18011 VAR10 (GETLANE, vget_lane,
18012 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18013 VAR10 (SETLANE, vset_lane,
18014 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18015 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18016 VAR10 (DUP, vdup_n,
18017 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18018 VAR10 (DUPLANE, vdup_lane,
18019 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18020 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18021 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18022 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18023 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18024 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18025 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18026 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18027 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18028 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18029 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18030 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18031 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18032 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18033 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18034 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18035 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18036 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18037 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18038 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18039 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18040 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18041 VAR10 (BINOP, vext,
18042 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18043 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18044 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18045 VAR2 (UNOP, vrev16, v8qi, v16qi),
18046 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18047 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18048 VAR10 (SELECT, vbsl,
18049 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18050 VAR1 (VTBL, vtbl1, v8qi),
18051 VAR1 (VTBL, vtbl2, v8qi),
18052 VAR1 (VTBL, vtbl3, v8qi),
18053 VAR1 (VTBL, vtbl4, v8qi),
18054 VAR1 (VTBX, vtbx1, v8qi),
18055 VAR1 (VTBX, vtbx2, v8qi),
18056 VAR1 (VTBX, vtbx3, v8qi),
18057 VAR1 (VTBX, vtbx4, v8qi),
18058 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18059 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18060 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18061 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18062 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18063 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18064 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18065 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18066 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18067 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18068 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18069 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18070 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18071 VAR10 (LOAD1, vld1,
18072 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18073 VAR10 (LOAD1LANE, vld1_lane,
18074 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18075 VAR10 (LOAD1, vld1_dup,
18076 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18077 VAR10 (STORE1, vst1,
18078 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18079 VAR10 (STORE1LANE, vst1_lane,
18080 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18082 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18083 VAR7 (LOADSTRUCTLANE, vld2_lane,
18084 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18085 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18086 VAR9 (STORESTRUCT, vst2,
18087 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18088 VAR7 (STORESTRUCTLANE, vst2_lane,
18089 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18091 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18092 VAR7 (LOADSTRUCTLANE, vld3_lane,
18093 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18094 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18095 VAR9 (STORESTRUCT, vst3,
18096 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18097 VAR7 (STORESTRUCTLANE, vst3_lane,
18098 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18099 VAR9 (LOADSTRUCT, vld4,
18100 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18101 VAR7 (LOADSTRUCTLANE, vld4_lane,
18102 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18103 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18104 VAR9 (STORESTRUCT, vst4,
18105 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18106 VAR7 (STORESTRUCTLANE, vst4_lane,
18107 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18108 VAR10 (LOGICBINOP, vand,
18109 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18110 VAR10 (LOGICBINOP, vorr,
18111 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18112 VAR10 (BINOP, veor,
18113 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18114 VAR10 (LOGICBINOP, vbic,
18115 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18116 VAR10 (LOGICBINOP, vorn,
18117 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18132 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18133 symbolic names defined here (which would require too much duplication).
18137 ARM_BUILTIN_GETWCX,
18138 ARM_BUILTIN_SETWCX,
18142 ARM_BUILTIN_WAVG2BR,
18143 ARM_BUILTIN_WAVG2HR,
18144 ARM_BUILTIN_WAVG2B,
18145 ARM_BUILTIN_WAVG2H,
18152 ARM_BUILTIN_WMACSZ,
18154 ARM_BUILTIN_WMACUZ,
18157 ARM_BUILTIN_WSADBZ,
18159 ARM_BUILTIN_WSADHZ,
18161 ARM_BUILTIN_WALIGN,
18164 ARM_BUILTIN_TMIAPH,
18165 ARM_BUILTIN_TMIABB,
18166 ARM_BUILTIN_TMIABT,
18167 ARM_BUILTIN_TMIATB,
18168 ARM_BUILTIN_TMIATT,
18170 ARM_BUILTIN_TMOVMSKB,
18171 ARM_BUILTIN_TMOVMSKH,
18172 ARM_BUILTIN_TMOVMSKW,
18174 ARM_BUILTIN_TBCSTB,
18175 ARM_BUILTIN_TBCSTH,
18176 ARM_BUILTIN_TBCSTW,
18178 ARM_BUILTIN_WMADDS,
18179 ARM_BUILTIN_WMADDU,
18181 ARM_BUILTIN_WPACKHSS,
18182 ARM_BUILTIN_WPACKWSS,
18183 ARM_BUILTIN_WPACKDSS,
18184 ARM_BUILTIN_WPACKHUS,
18185 ARM_BUILTIN_WPACKWUS,
18186 ARM_BUILTIN_WPACKDUS,
18191 ARM_BUILTIN_WADDSSB,
18192 ARM_BUILTIN_WADDSSH,
18193 ARM_BUILTIN_WADDSSW,
18194 ARM_BUILTIN_WADDUSB,
18195 ARM_BUILTIN_WADDUSH,
18196 ARM_BUILTIN_WADDUSW,
18200 ARM_BUILTIN_WSUBSSB,
18201 ARM_BUILTIN_WSUBSSH,
18202 ARM_BUILTIN_WSUBSSW,
18203 ARM_BUILTIN_WSUBUSB,
18204 ARM_BUILTIN_WSUBUSH,
18205 ARM_BUILTIN_WSUBUSW,
18212 ARM_BUILTIN_WCMPEQB,
18213 ARM_BUILTIN_WCMPEQH,
18214 ARM_BUILTIN_WCMPEQW,
18215 ARM_BUILTIN_WCMPGTUB,
18216 ARM_BUILTIN_WCMPGTUH,
18217 ARM_BUILTIN_WCMPGTUW,
18218 ARM_BUILTIN_WCMPGTSB,
18219 ARM_BUILTIN_WCMPGTSH,
18220 ARM_BUILTIN_WCMPGTSW,
18222 ARM_BUILTIN_TEXTRMSB,
18223 ARM_BUILTIN_TEXTRMSH,
18224 ARM_BUILTIN_TEXTRMSW,
18225 ARM_BUILTIN_TEXTRMUB,
18226 ARM_BUILTIN_TEXTRMUH,
18227 ARM_BUILTIN_TEXTRMUW,
18228 ARM_BUILTIN_TINSRB,
18229 ARM_BUILTIN_TINSRH,
18230 ARM_BUILTIN_TINSRW,
18232 ARM_BUILTIN_WMAXSW,
18233 ARM_BUILTIN_WMAXSH,
18234 ARM_BUILTIN_WMAXSB,
18235 ARM_BUILTIN_WMAXUW,
18236 ARM_BUILTIN_WMAXUH,
18237 ARM_BUILTIN_WMAXUB,
18238 ARM_BUILTIN_WMINSW,
18239 ARM_BUILTIN_WMINSH,
18240 ARM_BUILTIN_WMINSB,
18241 ARM_BUILTIN_WMINUW,
18242 ARM_BUILTIN_WMINUH,
18243 ARM_BUILTIN_WMINUB,
18245 ARM_BUILTIN_WMULUM,
18246 ARM_BUILTIN_WMULSM,
18247 ARM_BUILTIN_WMULUL,
18249 ARM_BUILTIN_PSADBH,
18250 ARM_BUILTIN_WSHUFH,
18264 ARM_BUILTIN_WSLLHI,
18265 ARM_BUILTIN_WSLLWI,
18266 ARM_BUILTIN_WSLLDI,
18267 ARM_BUILTIN_WSRAHI,
18268 ARM_BUILTIN_WSRAWI,
18269 ARM_BUILTIN_WSRADI,
18270 ARM_BUILTIN_WSRLHI,
18271 ARM_BUILTIN_WSRLWI,
18272 ARM_BUILTIN_WSRLDI,
18273 ARM_BUILTIN_WRORHI,
18274 ARM_BUILTIN_WRORWI,
18275 ARM_BUILTIN_WRORDI,
18277 ARM_BUILTIN_WUNPCKIHB,
18278 ARM_BUILTIN_WUNPCKIHH,
18279 ARM_BUILTIN_WUNPCKIHW,
18280 ARM_BUILTIN_WUNPCKILB,
18281 ARM_BUILTIN_WUNPCKILH,
18282 ARM_BUILTIN_WUNPCKILW,
18284 ARM_BUILTIN_WUNPCKEHSB,
18285 ARM_BUILTIN_WUNPCKEHSH,
18286 ARM_BUILTIN_WUNPCKEHSW,
18287 ARM_BUILTIN_WUNPCKEHUB,
18288 ARM_BUILTIN_WUNPCKEHUH,
18289 ARM_BUILTIN_WUNPCKEHUW,
18290 ARM_BUILTIN_WUNPCKELSB,
18291 ARM_BUILTIN_WUNPCKELSH,
18292 ARM_BUILTIN_WUNPCKELSW,
18293 ARM_BUILTIN_WUNPCKELUB,
18294 ARM_BUILTIN_WUNPCKELUH,
18295 ARM_BUILTIN_WUNPCKELUW,
18297 ARM_BUILTIN_THREAD_POINTER,
18299 ARM_BUILTIN_NEON_BASE,
18301 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18304 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18307 arm_init_neon_builtins (void)
18309 unsigned int i, fcode;
18312 tree neon_intQI_type_node;
18313 tree neon_intHI_type_node;
18314 tree neon_polyQI_type_node;
18315 tree neon_polyHI_type_node;
18316 tree neon_intSI_type_node;
18317 tree neon_intDI_type_node;
18318 tree neon_float_type_node;
18320 tree intQI_pointer_node;
18321 tree intHI_pointer_node;
18322 tree intSI_pointer_node;
18323 tree intDI_pointer_node;
18324 tree float_pointer_node;
18326 tree const_intQI_node;
18327 tree const_intHI_node;
18328 tree const_intSI_node;
18329 tree const_intDI_node;
18330 tree const_float_node;
18332 tree const_intQI_pointer_node;
18333 tree const_intHI_pointer_node;
18334 tree const_intSI_pointer_node;
18335 tree const_intDI_pointer_node;
18336 tree const_float_pointer_node;
18338 tree V8QI_type_node;
18339 tree V4HI_type_node;
18340 tree V2SI_type_node;
18341 tree V2SF_type_node;
18342 tree V16QI_type_node;
18343 tree V8HI_type_node;
18344 tree V4SI_type_node;
18345 tree V4SF_type_node;
18346 tree V2DI_type_node;
18348 tree intUQI_type_node;
18349 tree intUHI_type_node;
18350 tree intUSI_type_node;
18351 tree intUDI_type_node;
18353 tree intEI_type_node;
18354 tree intOI_type_node;
18355 tree intCI_type_node;
18356 tree intXI_type_node;
18358 tree V8QI_pointer_node;
18359 tree V4HI_pointer_node;
18360 tree V2SI_pointer_node;
18361 tree V2SF_pointer_node;
18362 tree V16QI_pointer_node;
18363 tree V8HI_pointer_node;
18364 tree V4SI_pointer_node;
18365 tree V4SF_pointer_node;
18366 tree V2DI_pointer_node;
18368 tree void_ftype_pv8qi_v8qi_v8qi;
18369 tree void_ftype_pv4hi_v4hi_v4hi;
18370 tree void_ftype_pv2si_v2si_v2si;
18371 tree void_ftype_pv2sf_v2sf_v2sf;
18372 tree void_ftype_pdi_di_di;
18373 tree void_ftype_pv16qi_v16qi_v16qi;
18374 tree void_ftype_pv8hi_v8hi_v8hi;
18375 tree void_ftype_pv4si_v4si_v4si;
18376 tree void_ftype_pv4sf_v4sf_v4sf;
18377 tree void_ftype_pv2di_v2di_v2di;
18379 tree reinterp_ftype_dreg[5][5];
18380 tree reinterp_ftype_qreg[5][5];
18381 tree dreg_types[5], qreg_types[5];
18383 /* Create distinguished type nodes for NEON vector element types,
18384 and pointers to values of such types, so we can detect them later. */
18385 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18386 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18387 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18388 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18389 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18390 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18391 neon_float_type_node = make_node (REAL_TYPE);
18392 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18393 layout_type (neon_float_type_node);
18395 /* Define typedefs which exactly correspond to the modes we are basing vector
18396 types on. If you change these names you'll need to change
18397 the table used by arm_mangle_type too. */
18398 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18399 "__builtin_neon_qi");
18400 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18401 "__builtin_neon_hi");
18402 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18403 "__builtin_neon_si");
18404 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18405 "__builtin_neon_sf");
18406 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18407 "__builtin_neon_di");
18408 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18409 "__builtin_neon_poly8");
18410 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18411 "__builtin_neon_poly16");
18413 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18414 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18415 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18416 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18417 float_pointer_node = build_pointer_type (neon_float_type_node);
18419 /* Next create constant-qualified versions of the above types. */
18420 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18422 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18424 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18426 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18428 const_float_node = build_qualified_type (neon_float_type_node,
18431 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18432 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18433 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18434 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18435 const_float_pointer_node = build_pointer_type (const_float_node);
18437 /* Now create vector types based on our NEON element types. */
18438 /* 64-bit vectors. */
18440 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18442 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18444 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18446 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18447 /* 128-bit vectors. */
18449 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18451 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18453 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18455 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18457 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18459 /* Unsigned integer types for various mode sizes. */
18460 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18461 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18462 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18463 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18465 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18466 "__builtin_neon_uqi");
18467 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18468 "__builtin_neon_uhi");
18469 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18470 "__builtin_neon_usi");
18471 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18472 "__builtin_neon_udi");
18474 /* Opaque integer types for structures of vectors. */
18475 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18476 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18477 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18478 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18480 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18481 "__builtin_neon_ti");
18482 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18483 "__builtin_neon_ei");
18484 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18485 "__builtin_neon_oi");
18486 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18487 "__builtin_neon_ci");
18488 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18489 "__builtin_neon_xi");
18491 /* Pointers to vector types. */
18492 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18493 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18494 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18495 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18496 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18497 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18498 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18499 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18500 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18502 /* Operations which return results as pairs. */
18503 void_ftype_pv8qi_v8qi_v8qi =
18504 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18505 V8QI_type_node, NULL);
18506 void_ftype_pv4hi_v4hi_v4hi =
18507 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18508 V4HI_type_node, NULL);
18509 void_ftype_pv2si_v2si_v2si =
18510 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18511 V2SI_type_node, NULL);
18512 void_ftype_pv2sf_v2sf_v2sf =
18513 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18514 V2SF_type_node, NULL);
18515 void_ftype_pdi_di_di =
18516 build_function_type_list (void_type_node, intDI_pointer_node,
18517 neon_intDI_type_node, neon_intDI_type_node, NULL);
18518 void_ftype_pv16qi_v16qi_v16qi =
18519 build_function_type_list (void_type_node, V16QI_pointer_node,
18520 V16QI_type_node, V16QI_type_node, NULL);
18521 void_ftype_pv8hi_v8hi_v8hi =
18522 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18523 V8HI_type_node, NULL);
18524 void_ftype_pv4si_v4si_v4si =
18525 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18526 V4SI_type_node, NULL);
18527 void_ftype_pv4sf_v4sf_v4sf =
18528 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18529 V4SF_type_node, NULL);
18530 void_ftype_pv2di_v2di_v2di =
18531 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18532 V2DI_type_node, NULL);
18534 dreg_types[0] = V8QI_type_node;
18535 dreg_types[1] = V4HI_type_node;
18536 dreg_types[2] = V2SI_type_node;
18537 dreg_types[3] = V2SF_type_node;
18538 dreg_types[4] = neon_intDI_type_node;
18540 qreg_types[0] = V16QI_type_node;
18541 qreg_types[1] = V8HI_type_node;
18542 qreg_types[2] = V4SI_type_node;
18543 qreg_types[3] = V4SF_type_node;
18544 qreg_types[4] = V2DI_type_node;
18546 for (i = 0; i < 5; i++)
18549 for (j = 0; j < 5; j++)
18551 reinterp_ftype_dreg[i][j]
18552 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18553 reinterp_ftype_qreg[i][j]
18554 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18558 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18559 i < ARRAY_SIZE (neon_builtin_data);
18562 neon_builtin_datum *d = &neon_builtin_data[i];
18564 const char* const modenames[] = {
18565 "v8qi", "v4hi", "v2si", "v2sf", "di",
18566 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18571 int is_load = 0, is_store = 0;
18573 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18580 case NEON_LOAD1LANE:
18581 case NEON_LOADSTRUCT:
18582 case NEON_LOADSTRUCTLANE:
18584 /* Fall through. */
18586 case NEON_STORE1LANE:
18587 case NEON_STORESTRUCT:
18588 case NEON_STORESTRUCTLANE:
18591 /* Fall through. */
18594 case NEON_LOGICBINOP:
18595 case NEON_SHIFTINSERT:
18602 case NEON_SHIFTIMM:
18603 case NEON_SHIFTACC:
18609 case NEON_LANEMULL:
18610 case NEON_LANEMULH:
18612 case NEON_SCALARMUL:
18613 case NEON_SCALARMULL:
18614 case NEON_SCALARMULH:
18615 case NEON_SCALARMAC:
18621 tree return_type = void_type_node, args = void_list_node;
18623 /* Build a function type directly from the insn_data for
18624 this builtin. The build_function_type() function takes
18625 care of removing duplicates for us. */
18626 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18630 if (is_load && k == 1)
18632 /* Neon load patterns always have the memory
18633 operand in the operand 1 position. */
18634 gcc_assert (insn_data[d->code].operand[k].predicate
18635 == neon_struct_operand);
18641 eltype = const_intQI_pointer_node;
18646 eltype = const_intHI_pointer_node;
18651 eltype = const_intSI_pointer_node;
18656 eltype = const_float_pointer_node;
18661 eltype = const_intDI_pointer_node;
18664 default: gcc_unreachable ();
18667 else if (is_store && k == 0)
18669 /* Similarly, Neon store patterns use operand 0 as
18670 the memory location to store to. */
18671 gcc_assert (insn_data[d->code].operand[k].predicate
18672 == neon_struct_operand);
18678 eltype = intQI_pointer_node;
18683 eltype = intHI_pointer_node;
18688 eltype = intSI_pointer_node;
18693 eltype = float_pointer_node;
18698 eltype = intDI_pointer_node;
18701 default: gcc_unreachable ();
18706 switch (insn_data[d->code].operand[k].mode)
18708 case VOIDmode: eltype = void_type_node; break;
18710 case QImode: eltype = neon_intQI_type_node; break;
18711 case HImode: eltype = neon_intHI_type_node; break;
18712 case SImode: eltype = neon_intSI_type_node; break;
18713 case SFmode: eltype = neon_float_type_node; break;
18714 case DImode: eltype = neon_intDI_type_node; break;
18715 case TImode: eltype = intTI_type_node; break;
18716 case EImode: eltype = intEI_type_node; break;
18717 case OImode: eltype = intOI_type_node; break;
18718 case CImode: eltype = intCI_type_node; break;
18719 case XImode: eltype = intXI_type_node; break;
18720 /* 64-bit vectors. */
18721 case V8QImode: eltype = V8QI_type_node; break;
18722 case V4HImode: eltype = V4HI_type_node; break;
18723 case V2SImode: eltype = V2SI_type_node; break;
18724 case V2SFmode: eltype = V2SF_type_node; break;
18725 /* 128-bit vectors. */
18726 case V16QImode: eltype = V16QI_type_node; break;
18727 case V8HImode: eltype = V8HI_type_node; break;
18728 case V4SImode: eltype = V4SI_type_node; break;
18729 case V4SFmode: eltype = V4SF_type_node; break;
18730 case V2DImode: eltype = V2DI_type_node; break;
18731 default: gcc_unreachable ();
18735 if (k == 0 && !is_store)
18736 return_type = eltype;
18738 args = tree_cons (NULL_TREE, eltype, args);
18741 ftype = build_function_type (return_type, args);
18745 case NEON_RESULTPAIR:
18747 switch (insn_data[d->code].operand[1].mode)
18749 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18750 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18751 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18752 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18753 case DImode: ftype = void_ftype_pdi_di_di; break;
18754 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18755 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18756 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18757 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18758 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18759 default: gcc_unreachable ();
18764 case NEON_REINTERP:
18766 /* We iterate over 5 doubleword types, then 5 quadword
18768 int rhs = d->mode % 5;
18769 switch (insn_data[d->code].operand[0].mode)
18771 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18772 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18773 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18774 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18775 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18776 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18777 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18778 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18779 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18780 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18781 default: gcc_unreachable ();
18787 gcc_unreachable ();
18790 gcc_assert (ftype != NULL);
18792 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
18794 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
18796 arm_builtin_decls[fcode] = decl;
18800 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18803 if ((MASK) & insn_flags) \
18806 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18807 BUILT_IN_MD, NULL, NULL_TREE); \
18808 arm_builtin_decls[CODE] = bdecl; \
18813 struct builtin_description
18815 const unsigned int mask;
18816 const enum insn_code icode;
18817 const char * const name;
18818 const enum arm_builtins code;
18819 const enum rtx_code comparison;
18820 const unsigned int flag;
18823 static const struct builtin_description bdesc_2arg[] =
18825 #define IWMMXT_BUILTIN(code, string, builtin) \
18826 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18827 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18829 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
18830 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
18831 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
18832 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
18833 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
18834 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
18835 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
18836 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
18837 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
18838 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
18839 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
18840 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
18841 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
18842 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
18843 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
18844 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
18845 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
18846 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
18847 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
18848 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
18849 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
18850 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
18851 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
18852 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
18853 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
18854 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
18855 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
18856 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
18857 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
18858 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
18859 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
18860 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
18861 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
18862 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
18863 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
18864 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
18865 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
18866 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
18867 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
18868 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
18869 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
18870 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
18871 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
18872 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
18873 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
18874 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
18875 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
18876 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
18877 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
18878 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
18879 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
18880 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
18881 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
18882 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
18883 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
18884 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
18885 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
18886 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
18888 #define IWMMXT_BUILTIN2(code, builtin) \
18889 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18891 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
18892 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
18893 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
18894 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
18895 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
18896 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
18897 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
18898 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
18899 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
18900 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
18901 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
18902 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
18903 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
18904 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
18905 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
18906 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
18907 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
18908 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
18909 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
18910 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
18911 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
18912 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
18913 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
18914 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
18915 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
18916 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
18917 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
18918 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
18919 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
18920 IWMMXT_BUILTIN2 (rordi3, WRORDI)
18921 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
18922 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
18925 static const struct builtin_description bdesc_1arg[] =
18927 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
18928 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
18929 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
18930 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
18931 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
18932 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
18933 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
18934 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
18935 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
18936 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
18937 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
18938 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
18939 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
18940 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
18941 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
18942 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
18943 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
18944 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
18947 /* Set up all the iWMMXt builtins. This is not called if
18948 TARGET_IWMMXT is zero. */
18951 arm_init_iwmmxt_builtins (void)
18953 const struct builtin_description * d;
18955 tree endlink = void_list_node;
18957 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18958 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18959 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
18962 = build_function_type (integer_type_node,
18963 tree_cons (NULL_TREE, integer_type_node, endlink));
18964 tree v8qi_ftype_v8qi_v8qi_int
18965 = build_function_type (V8QI_type_node,
18966 tree_cons (NULL_TREE, V8QI_type_node,
18967 tree_cons (NULL_TREE, V8QI_type_node,
18968 tree_cons (NULL_TREE,
18971 tree v4hi_ftype_v4hi_int
18972 = build_function_type (V4HI_type_node,
18973 tree_cons (NULL_TREE, V4HI_type_node,
18974 tree_cons (NULL_TREE, integer_type_node,
18976 tree v2si_ftype_v2si_int
18977 = build_function_type (V2SI_type_node,
18978 tree_cons (NULL_TREE, V2SI_type_node,
18979 tree_cons (NULL_TREE, integer_type_node,
18981 tree v2si_ftype_di_di
18982 = build_function_type (V2SI_type_node,
18983 tree_cons (NULL_TREE, long_long_integer_type_node,
18984 tree_cons (NULL_TREE,
18985 long_long_integer_type_node,
18987 tree di_ftype_di_int
18988 = build_function_type (long_long_integer_type_node,
18989 tree_cons (NULL_TREE, long_long_integer_type_node,
18990 tree_cons (NULL_TREE, integer_type_node,
18992 tree di_ftype_di_int_int
18993 = build_function_type (long_long_integer_type_node,
18994 tree_cons (NULL_TREE, long_long_integer_type_node,
18995 tree_cons (NULL_TREE, integer_type_node,
18996 tree_cons (NULL_TREE,
18999 tree int_ftype_v8qi
19000 = build_function_type (integer_type_node,
19001 tree_cons (NULL_TREE, V8QI_type_node,
19003 tree int_ftype_v4hi
19004 = build_function_type (integer_type_node,
19005 tree_cons (NULL_TREE, V4HI_type_node,
19007 tree int_ftype_v2si
19008 = build_function_type (integer_type_node,
19009 tree_cons (NULL_TREE, V2SI_type_node,
19011 tree int_ftype_v8qi_int
19012 = build_function_type (integer_type_node,
19013 tree_cons (NULL_TREE, V8QI_type_node,
19014 tree_cons (NULL_TREE, integer_type_node,
19016 tree int_ftype_v4hi_int
19017 = build_function_type (integer_type_node,
19018 tree_cons (NULL_TREE, V4HI_type_node,
19019 tree_cons (NULL_TREE, integer_type_node,
19021 tree int_ftype_v2si_int
19022 = build_function_type (integer_type_node,
19023 tree_cons (NULL_TREE, V2SI_type_node,
19024 tree_cons (NULL_TREE, integer_type_node,
19026 tree v8qi_ftype_v8qi_int_int
19027 = build_function_type (V8QI_type_node,
19028 tree_cons (NULL_TREE, V8QI_type_node,
19029 tree_cons (NULL_TREE, integer_type_node,
19030 tree_cons (NULL_TREE,
19033 tree v4hi_ftype_v4hi_int_int
19034 = build_function_type (V4HI_type_node,
19035 tree_cons (NULL_TREE, V4HI_type_node,
19036 tree_cons (NULL_TREE, integer_type_node,
19037 tree_cons (NULL_TREE,
19040 tree v2si_ftype_v2si_int_int
19041 = build_function_type (V2SI_type_node,
19042 tree_cons (NULL_TREE, V2SI_type_node,
19043 tree_cons (NULL_TREE, integer_type_node,
19044 tree_cons (NULL_TREE,
19047 /* Miscellaneous. */
19048 tree v8qi_ftype_v4hi_v4hi
19049 = build_function_type (V8QI_type_node,
19050 tree_cons (NULL_TREE, V4HI_type_node,
19051 tree_cons (NULL_TREE, V4HI_type_node,
19053 tree v4hi_ftype_v2si_v2si
19054 = build_function_type (V4HI_type_node,
19055 tree_cons (NULL_TREE, V2SI_type_node,
19056 tree_cons (NULL_TREE, V2SI_type_node,
19058 tree v2si_ftype_v4hi_v4hi
19059 = build_function_type (V2SI_type_node,
19060 tree_cons (NULL_TREE, V4HI_type_node,
19061 tree_cons (NULL_TREE, V4HI_type_node,
19063 tree v2si_ftype_v8qi_v8qi
19064 = build_function_type (V2SI_type_node,
19065 tree_cons (NULL_TREE, V8QI_type_node,
19066 tree_cons (NULL_TREE, V8QI_type_node,
19068 tree v4hi_ftype_v4hi_di
19069 = build_function_type (V4HI_type_node,
19070 tree_cons (NULL_TREE, V4HI_type_node,
19071 tree_cons (NULL_TREE,
19072 long_long_integer_type_node,
19074 tree v2si_ftype_v2si_di
19075 = build_function_type (V2SI_type_node,
19076 tree_cons (NULL_TREE, V2SI_type_node,
19077 tree_cons (NULL_TREE,
19078 long_long_integer_type_node,
19080 tree void_ftype_int_int
19081 = build_function_type (void_type_node,
19082 tree_cons (NULL_TREE, integer_type_node,
19083 tree_cons (NULL_TREE, integer_type_node,
19086 = build_function_type (long_long_unsigned_type_node, endlink);
19088 = build_function_type (long_long_integer_type_node,
19089 tree_cons (NULL_TREE, V8QI_type_node,
19092 = build_function_type (long_long_integer_type_node,
19093 tree_cons (NULL_TREE, V4HI_type_node,
19096 = build_function_type (long_long_integer_type_node,
19097 tree_cons (NULL_TREE, V2SI_type_node,
19099 tree v2si_ftype_v4hi
19100 = build_function_type (V2SI_type_node,
19101 tree_cons (NULL_TREE, V4HI_type_node,
19103 tree v4hi_ftype_v8qi
19104 = build_function_type (V4HI_type_node,
19105 tree_cons (NULL_TREE, V8QI_type_node,
19108 tree di_ftype_di_v4hi_v4hi
19109 = build_function_type (long_long_unsigned_type_node,
19110 tree_cons (NULL_TREE,
19111 long_long_unsigned_type_node,
19112 tree_cons (NULL_TREE, V4HI_type_node,
19113 tree_cons (NULL_TREE,
19117 tree di_ftype_v4hi_v4hi
19118 = build_function_type (long_long_unsigned_type_node,
19119 tree_cons (NULL_TREE, V4HI_type_node,
19120 tree_cons (NULL_TREE, V4HI_type_node,
19123 /* Normal vector binops. */
19124 tree v8qi_ftype_v8qi_v8qi
19125 = build_function_type (V8QI_type_node,
19126 tree_cons (NULL_TREE, V8QI_type_node,
19127 tree_cons (NULL_TREE, V8QI_type_node,
19129 tree v4hi_ftype_v4hi_v4hi
19130 = build_function_type (V4HI_type_node,
19131 tree_cons (NULL_TREE, V4HI_type_node,
19132 tree_cons (NULL_TREE, V4HI_type_node,
19134 tree v2si_ftype_v2si_v2si
19135 = build_function_type (V2SI_type_node,
19136 tree_cons (NULL_TREE, V2SI_type_node,
19137 tree_cons (NULL_TREE, V2SI_type_node,
19139 tree di_ftype_di_di
19140 = build_function_type (long_long_unsigned_type_node,
19141 tree_cons (NULL_TREE, long_long_unsigned_type_node,
19142 tree_cons (NULL_TREE,
19143 long_long_unsigned_type_node,
19146 /* Add all builtins that are more or less simple operations on two
19148 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19150 /* Use one of the operands; the target can have a different mode for
19151 mask-generating compares. */
19152 enum machine_mode mode;
19158 mode = insn_data[d->icode].operand[1].mode;
19163 type = v8qi_ftype_v8qi_v8qi;
19166 type = v4hi_ftype_v4hi_v4hi;
19169 type = v2si_ftype_v2si_v2si;
19172 type = di_ftype_di_di;
19176 gcc_unreachable ();
19179 def_mbuiltin (d->mask, d->name, type, d->code);
19182 /* Add the remaining MMX insns with somewhat more complicated types. */
19183 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19184 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19185 ARM_BUILTIN_ ## CODE)
19187 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19188 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19189 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19191 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19192 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19193 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19194 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19195 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19196 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19198 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19199 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19200 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19201 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19202 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19203 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19205 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19206 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19207 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19208 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19209 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19210 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19212 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19213 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19214 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19215 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19216 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19217 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19219 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19221 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19222 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19223 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19224 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19226 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19227 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19228 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19229 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19230 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19231 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19232 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19233 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19234 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19236 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19237 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19238 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19240 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19241 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19242 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19244 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19245 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19246 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19247 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19248 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19249 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19251 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19252 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19253 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19254 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19255 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19256 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19257 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19258 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19259 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19260 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19261 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19262 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19264 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19265 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19266 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19267 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19269 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19270 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19271 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19272 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19273 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19274 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19275 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19277 #undef iwmmx_mbuiltin
19281 arm_init_tls_builtins (void)
19285 ftype = build_function_type (ptr_type_node, void_list_node);
19286 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19287 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19289 TREE_NOTHROW (decl) = 1;
19290 TREE_READONLY (decl) = 1;
19291 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19295 arm_init_fp16_builtins (void)
19297 tree fp16_type = make_node (REAL_TYPE);
19298 TYPE_PRECISION (fp16_type) = 16;
19299 layout_type (fp16_type);
19300 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19304 arm_init_builtins (void)
19306 arm_init_tls_builtins ();
19308 if (TARGET_REALLY_IWMMXT)
19309 arm_init_iwmmxt_builtins ();
19312 arm_init_neon_builtins ();
19314 if (arm_fp16_format)
19315 arm_init_fp16_builtins ();
19318 /* Return the ARM builtin for CODE. */
19321 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19323 if (code >= ARM_BUILTIN_MAX)
19324 return error_mark_node;
19326 return arm_builtin_decls[code];
19329 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19331 static const char *
19332 arm_invalid_parameter_type (const_tree t)
19334 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19335 return N_("function parameters cannot have __fp16 type");
19339 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19341 static const char *
19342 arm_invalid_return_type (const_tree t)
19344 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19345 return N_("functions cannot return __fp16 type");
19349 /* Implement TARGET_PROMOTED_TYPE. */
19352 arm_promoted_type (const_tree t)
19354 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19355 return float_type_node;
19359 /* Implement TARGET_CONVERT_TO_TYPE.
19360 Specifically, this hook implements the peculiarity of the ARM
19361 half-precision floating-point C semantics that requires conversions between
19362 __fp16 to or from double to do an intermediate conversion to float. */
19365 arm_convert_to_type (tree type, tree expr)
19367 tree fromtype = TREE_TYPE (expr);
19368 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19370 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19371 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19372 return convert (type, convert (float_type_node, expr));
19376 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19377 This simply adds HFmode as a supported mode; even though we don't
19378 implement arithmetic on this type directly, it's supported by
19379 optabs conversions, much the way the double-word arithmetic is
19380 special-cased in the default hook. */
19383 arm_scalar_mode_supported_p (enum machine_mode mode)
19385 if (mode == HFmode)
19386 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19388 return default_scalar_mode_supported_p (mode);
19391 /* Errors in the source file can cause expand_expr to return const0_rtx
19392 where we expect a vector. To avoid crashing, use one of the vector
19393 clear instructions. */
19396 safe_vector_operand (rtx x, enum machine_mode mode)
19398 if (x != const0_rtx)
19400 x = gen_reg_rtx (mode);
19402 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19403 : gen_rtx_SUBREG (DImode, x, 0)));
19407 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19410 arm_expand_binop_builtin (enum insn_code icode,
19411 tree exp, rtx target)
19414 tree arg0 = CALL_EXPR_ARG (exp, 0);
19415 tree arg1 = CALL_EXPR_ARG (exp, 1);
19416 rtx op0 = expand_normal (arg0);
19417 rtx op1 = expand_normal (arg1);
19418 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19419 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19420 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19422 if (VECTOR_MODE_P (mode0))
19423 op0 = safe_vector_operand (op0, mode0);
19424 if (VECTOR_MODE_P (mode1))
19425 op1 = safe_vector_operand (op1, mode1);
19428 || GET_MODE (target) != tmode
19429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19430 target = gen_reg_rtx (tmode);
19432 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19434 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19435 op0 = copy_to_mode_reg (mode0, op0);
19436 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19437 op1 = copy_to_mode_reg (mode1, op1);
19439 pat = GEN_FCN (icode) (target, op0, op1);
19446 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19449 arm_expand_unop_builtin (enum insn_code icode,
19450 tree exp, rtx target, int do_load)
19453 tree arg0 = CALL_EXPR_ARG (exp, 0);
19454 rtx op0 = expand_normal (arg0);
19455 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19456 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19459 || GET_MODE (target) != tmode
19460 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19461 target = gen_reg_rtx (tmode);
19463 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19466 if (VECTOR_MODE_P (mode0))
19467 op0 = safe_vector_operand (op0, mode0);
19469 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19470 op0 = copy_to_mode_reg (mode0, op0);
19473 pat = GEN_FCN (icode) (target, op0);
19481 NEON_ARG_COPY_TO_REG,
19487 #define NEON_MAX_BUILTIN_ARGS 5
19489 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19490 and return an expression for the accessed memory.
19492 The intrinsic function operates on a block of registers that has
19493 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19494 The function references the memory at EXP in mode MEM_MODE;
19495 this mode may be BLKmode if no more suitable mode is available. */
19498 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19499 enum machine_mode reg_mode,
19500 neon_builtin_type_mode type_mode)
19502 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19503 tree elem_type, upper_bound, array_type;
19505 /* Work out the size of the register block in bytes. */
19506 reg_size = GET_MODE_SIZE (reg_mode);
19508 /* Work out the size of each vector in bytes. */
19509 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19510 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19512 /* Work out how many vectors there are. */
19513 gcc_assert (reg_size % vector_size == 0);
19514 nvectors = reg_size / vector_size;
19516 /* Work out how many elements are being loaded or stored.
19517 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19518 and memory elements; anything else implies a lane load or store. */
19519 if (mem_mode == reg_mode)
19520 nelems = vector_size * nvectors;
19524 /* Work out the type of each element. */
19525 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19526 elem_type = TREE_TYPE (TREE_TYPE (exp));
19528 /* Create a type that describes the full access. */
19529 upper_bound = build_int_cst (size_type_node, nelems - 1);
19530 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19532 /* Dereference EXP using that type. */
19533 exp = convert (build_pointer_type (array_type), exp);
19534 return fold_build2 (MEM_REF, array_type, exp,
19535 build_int_cst (TREE_TYPE (exp), 0));
19538 /* Expand a Neon builtin. */
19540 arm_expand_neon_args (rtx target, int icode, int have_retval,
19541 neon_builtin_type_mode type_mode,
19546 tree arg[NEON_MAX_BUILTIN_ARGS];
19547 rtx op[NEON_MAX_BUILTIN_ARGS];
19548 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19549 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19550 enum machine_mode other_mode;
19556 || GET_MODE (target) != tmode
19557 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19558 target = gen_reg_rtx (tmode);
19560 va_start (ap, exp);
19564 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19566 if (thisarg == NEON_ARG_STOP)
19570 opno = argc + have_retval;
19571 mode[argc] = insn_data[icode].operand[opno].mode;
19572 arg[argc] = CALL_EXPR_ARG (exp, argc);
19573 if (thisarg == NEON_ARG_MEMORY)
19575 other_mode = insn_data[icode].operand[1 - opno].mode;
19576 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19577 other_mode, type_mode);
19579 op[argc] = expand_normal (arg[argc]);
19583 case NEON_ARG_COPY_TO_REG:
19584 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19585 if (!(*insn_data[icode].operand[opno].predicate)
19586 (op[argc], mode[argc]))
19587 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19590 case NEON_ARG_CONSTANT:
19591 /* FIXME: This error message is somewhat unhelpful. */
19592 if (!(*insn_data[icode].operand[opno].predicate)
19593 (op[argc], mode[argc]))
19594 error ("argument must be a constant");
19597 case NEON_ARG_MEMORY:
19598 gcc_assert (MEM_P (op[argc]));
19599 PUT_MODE (op[argc], mode[argc]);
19600 /* ??? arm_neon.h uses the same built-in functions for signed
19601 and unsigned accesses, casting where necessary. This isn't
19603 set_mem_alias_set (op[argc], 0);
19604 if (!(*insn_data[icode].operand[opno].predicate)
19605 (op[argc], mode[argc]))
19606 op[argc] = (replace_equiv_address
19607 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19610 case NEON_ARG_STOP:
19611 gcc_unreachable ();
19624 pat = GEN_FCN (icode) (target, op[0]);
19628 pat = GEN_FCN (icode) (target, op[0], op[1]);
19632 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19636 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19640 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19644 gcc_unreachable ();
19650 pat = GEN_FCN (icode) (op[0]);
19654 pat = GEN_FCN (icode) (op[0], op[1]);
19658 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19662 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19666 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19670 gcc_unreachable ();
19681 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19682 constants defined per-instruction or per instruction-variant. Instead, the
19683 required info is looked up in the table neon_builtin_data. */
19685 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19687 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19688 neon_itype itype = d->itype;
19689 enum insn_code icode = d->code;
19690 neon_builtin_type_mode type_mode = d->mode;
19697 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19698 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19702 case NEON_SCALARMUL:
19703 case NEON_SCALARMULL:
19704 case NEON_SCALARMULH:
19705 case NEON_SHIFTINSERT:
19706 case NEON_LOGICBINOP:
19707 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19708 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19712 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19713 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19714 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19718 case NEON_SHIFTIMM:
19719 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19720 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19724 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19725 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19729 case NEON_REINTERP:
19730 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19731 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19735 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19736 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19738 case NEON_RESULTPAIR:
19739 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19740 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19744 case NEON_LANEMULL:
19745 case NEON_LANEMULH:
19746 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19747 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19748 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19751 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19752 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19753 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19755 case NEON_SHIFTACC:
19756 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19757 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19758 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19760 case NEON_SCALARMAC:
19761 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19762 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19763 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19767 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19768 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19772 case NEON_LOADSTRUCT:
19773 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19774 NEON_ARG_MEMORY, NEON_ARG_STOP);
19776 case NEON_LOAD1LANE:
19777 case NEON_LOADSTRUCTLANE:
19778 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19779 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19783 case NEON_STORESTRUCT:
19784 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19785 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19787 case NEON_STORE1LANE:
19788 case NEON_STORESTRUCTLANE:
19789 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19790 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19794 gcc_unreachable ();
19797 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19799 neon_reinterpret (rtx dest, rtx src)
19801 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19804 /* Emit code to place a Neon pair result in memory locations (with equal
19807 neon_emit_pair_result_insn (enum machine_mode mode,
19808 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19811 rtx mem = gen_rtx_MEM (mode, destaddr);
19812 rtx tmp1 = gen_reg_rtx (mode);
19813 rtx tmp2 = gen_reg_rtx (mode);
19815 emit_insn (intfn (tmp1, op1, op2, tmp2));
19817 emit_move_insn (mem, tmp1);
19818 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19819 emit_move_insn (mem, tmp2);
19822 /* Set up operands for a register copy from src to dest, taking care not to
19823 clobber registers in the process.
19824 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19825 be called with a large N, so that should be OK. */
19828 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19830 unsigned int copied = 0, opctr = 0;
19831 unsigned int done = (1 << count) - 1;
19834 while (copied != done)
19836 for (i = 0; i < count; i++)
19840 for (j = 0; good && j < count; j++)
19841 if (i != j && (copied & (1 << j)) == 0
19842 && reg_overlap_mentioned_p (src[j], dest[i]))
19847 operands[opctr++] = dest[i];
19848 operands[opctr++] = src[i];
19854 gcc_assert (opctr == count * 2);
19857 /* Expand an expression EXP that calls a built-in function,
19858 with result going to TARGET if that's convenient
19859 (and in mode MODE if that's convenient).
19860 SUBTARGET may be used as the target for computing one of EXP's operands.
19861 IGNORE is nonzero if the value is to be ignored. */
19864 arm_expand_builtin (tree exp,
19866 rtx subtarget ATTRIBUTE_UNUSED,
19867 enum machine_mode mode ATTRIBUTE_UNUSED,
19868 int ignore ATTRIBUTE_UNUSED)
19870 const struct builtin_description * d;
19871 enum insn_code icode;
19872 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19880 int fcode = DECL_FUNCTION_CODE (fndecl);
19882 enum machine_mode tmode;
19883 enum machine_mode mode0;
19884 enum machine_mode mode1;
19885 enum machine_mode mode2;
19887 if (fcode >= ARM_BUILTIN_NEON_BASE)
19888 return arm_expand_neon_builtin (fcode, exp, target);
19892 case ARM_BUILTIN_TEXTRMSB:
19893 case ARM_BUILTIN_TEXTRMUB:
19894 case ARM_BUILTIN_TEXTRMSH:
19895 case ARM_BUILTIN_TEXTRMUH:
19896 case ARM_BUILTIN_TEXTRMSW:
19897 case ARM_BUILTIN_TEXTRMUW:
19898 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19899 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19900 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19901 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19902 : CODE_FOR_iwmmxt_textrmw);
19904 arg0 = CALL_EXPR_ARG (exp, 0);
19905 arg1 = CALL_EXPR_ARG (exp, 1);
19906 op0 = expand_normal (arg0);
19907 op1 = expand_normal (arg1);
19908 tmode = insn_data[icode].operand[0].mode;
19909 mode0 = insn_data[icode].operand[1].mode;
19910 mode1 = insn_data[icode].operand[2].mode;
19912 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19913 op0 = copy_to_mode_reg (mode0, op0);
19914 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19916 /* @@@ better error message */
19917 error ("selector must be an immediate");
19918 return gen_reg_rtx (tmode);
19921 || GET_MODE (target) != tmode
19922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19923 target = gen_reg_rtx (tmode);
19924 pat = GEN_FCN (icode) (target, op0, op1);
19930 case ARM_BUILTIN_TINSRB:
19931 case ARM_BUILTIN_TINSRH:
19932 case ARM_BUILTIN_TINSRW:
19933 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19934 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19935 : CODE_FOR_iwmmxt_tinsrw);
19936 arg0 = CALL_EXPR_ARG (exp, 0);
19937 arg1 = CALL_EXPR_ARG (exp, 1);
19938 arg2 = CALL_EXPR_ARG (exp, 2);
19939 op0 = expand_normal (arg0);
19940 op1 = expand_normal (arg1);
19941 op2 = expand_normal (arg2);
19942 tmode = insn_data[icode].operand[0].mode;
19943 mode0 = insn_data[icode].operand[1].mode;
19944 mode1 = insn_data[icode].operand[2].mode;
19945 mode2 = insn_data[icode].operand[3].mode;
19947 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19948 op0 = copy_to_mode_reg (mode0, op0);
19949 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19950 op1 = copy_to_mode_reg (mode1, op1);
19951 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19953 /* @@@ better error message */
19954 error ("selector must be an immediate");
19958 || GET_MODE (target) != tmode
19959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19960 target = gen_reg_rtx (tmode);
19961 pat = GEN_FCN (icode) (target, op0, op1, op2);
19967 case ARM_BUILTIN_SETWCX:
19968 arg0 = CALL_EXPR_ARG (exp, 0);
19969 arg1 = CALL_EXPR_ARG (exp, 1);
19970 op0 = force_reg (SImode, expand_normal (arg0));
19971 op1 = expand_normal (arg1);
19972 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19975 case ARM_BUILTIN_GETWCX:
19976 arg0 = CALL_EXPR_ARG (exp, 0);
19977 op0 = expand_normal (arg0);
19978 target = gen_reg_rtx (SImode);
19979 emit_insn (gen_iwmmxt_tmrc (target, op0));
19982 case ARM_BUILTIN_WSHUFH:
19983 icode = CODE_FOR_iwmmxt_wshufh;
19984 arg0 = CALL_EXPR_ARG (exp, 0);
19985 arg1 = CALL_EXPR_ARG (exp, 1);
19986 op0 = expand_normal (arg0);
19987 op1 = expand_normal (arg1);
19988 tmode = insn_data[icode].operand[0].mode;
19989 mode1 = insn_data[icode].operand[1].mode;
19990 mode2 = insn_data[icode].operand[2].mode;
19992 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19993 op0 = copy_to_mode_reg (mode1, op0);
19994 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19996 /* @@@ better error message */
19997 error ("mask must be an immediate");
20001 || GET_MODE (target) != tmode
20002 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20003 target = gen_reg_rtx (tmode);
20004 pat = GEN_FCN (icode) (target, op0, op1);
20010 case ARM_BUILTIN_WSADB:
20011 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20012 case ARM_BUILTIN_WSADH:
20013 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20014 case ARM_BUILTIN_WSADBZ:
20015 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20016 case ARM_BUILTIN_WSADHZ:
20017 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20019 /* Several three-argument builtins. */
20020 case ARM_BUILTIN_WMACS:
20021 case ARM_BUILTIN_WMACU:
20022 case ARM_BUILTIN_WALIGN:
20023 case ARM_BUILTIN_TMIA:
20024 case ARM_BUILTIN_TMIAPH:
20025 case ARM_BUILTIN_TMIATT:
20026 case ARM_BUILTIN_TMIATB:
20027 case ARM_BUILTIN_TMIABT:
20028 case ARM_BUILTIN_TMIABB:
20029 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20030 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20031 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20032 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20033 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20034 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20035 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20036 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20037 : CODE_FOR_iwmmxt_walign);
20038 arg0 = CALL_EXPR_ARG (exp, 0);
20039 arg1 = CALL_EXPR_ARG (exp, 1);
20040 arg2 = CALL_EXPR_ARG (exp, 2);
20041 op0 = expand_normal (arg0);
20042 op1 = expand_normal (arg1);
20043 op2 = expand_normal (arg2);
20044 tmode = insn_data[icode].operand[0].mode;
20045 mode0 = insn_data[icode].operand[1].mode;
20046 mode1 = insn_data[icode].operand[2].mode;
20047 mode2 = insn_data[icode].operand[3].mode;
20049 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20050 op0 = copy_to_mode_reg (mode0, op0);
20051 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20052 op1 = copy_to_mode_reg (mode1, op1);
20053 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20054 op2 = copy_to_mode_reg (mode2, op2);
20056 || GET_MODE (target) != tmode
20057 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20058 target = gen_reg_rtx (tmode);
20059 pat = GEN_FCN (icode) (target, op0, op1, op2);
20065 case ARM_BUILTIN_WZERO:
20066 target = gen_reg_rtx (DImode);
20067 emit_insn (gen_iwmmxt_clrdi (target));
20070 case ARM_BUILTIN_THREAD_POINTER:
20071 return arm_load_tp (target);
20077 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20078 if (d->code == (const enum arm_builtins) fcode)
20079 return arm_expand_binop_builtin (d->icode, exp, target);
20081 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20082 if (d->code == (const enum arm_builtins) fcode)
20083 return arm_expand_unop_builtin (d->icode, exp, target, 0);
20085 /* @@@ Should really do something sensible here. */
20089 /* Return the number (counting from 0) of
20090 the least significant set bit in MASK. */
20093 number_of_first_bit_set (unsigned mask)
20098 (mask & (1 << bit)) == 0;
20105 /* Emit code to push or pop registers to or from the stack. F is the
20106 assembly file. MASK is the registers to push or pop. PUSH is
20107 nonzero if we should push, and zero if we should pop. For debugging
20108 output, if pushing, adjust CFA_OFFSET by the amount of space added
20109 to the stack. REAL_REGS should have the same number of bits set as
20110 MASK, and will be used instead (in the same order) to describe which
20111 registers were saved - this is used to mark the save slots when we
20112 push high registers after moving them to low registers. */
20114 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
20115 unsigned long real_regs)
20118 int lo_mask = mask & 0xFF;
20119 int pushed_words = 0;
20123 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
20125 /* Special case. Do not generate a POP PC statement here, do it in
20127 thumb_exit (f, -1);
20131 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
20133 fprintf (f, "\t.save\t{");
20134 for (regno = 0; regno < 15; regno++)
20136 if (real_regs & (1 << regno))
20138 if (real_regs & ((1 << regno) -1))
20140 asm_fprintf (f, "%r", regno);
20143 fprintf (f, "}\n");
20146 fprintf (f, "\t%s\t{", push ? "push" : "pop");
20148 /* Look at the low registers first. */
20149 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20153 asm_fprintf (f, "%r", regno);
20155 if ((lo_mask & ~1) != 0)
20162 if (push && (mask & (1 << LR_REGNUM)))
20164 /* Catch pushing the LR. */
20168 asm_fprintf (f, "%r", LR_REGNUM);
20172 else if (!push && (mask & (1 << PC_REGNUM)))
20174 /* Catch popping the PC. */
20175 if (TARGET_INTERWORK || TARGET_BACKTRACE
20176 || crtl->calls_eh_return)
20178 /* The PC is never poped directly, instead
20179 it is popped into r3 and then BX is used. */
20180 fprintf (f, "}\n");
20182 thumb_exit (f, -1);
20191 asm_fprintf (f, "%r", PC_REGNUM);
20195 fprintf (f, "}\n");
20197 if (push && pushed_words && dwarf2out_do_frame ())
20199 char *l = dwarf2out_cfi_label (false);
20200 int pushed_mask = real_regs;
20202 *cfa_offset += pushed_words * 4;
20203 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
20206 pushed_mask = real_regs;
20207 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
20209 if (pushed_mask & 1)
20210 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
20215 /* Generate code to return from a thumb function.
20216 If 'reg_containing_return_addr' is -1, then the return address is
20217 actually on the stack, at the stack pointer. */
20219 thumb_exit (FILE *f, int reg_containing_return_addr)
20221 unsigned regs_available_for_popping;
20222 unsigned regs_to_pop;
20224 unsigned available;
20228 int restore_a4 = FALSE;
20230 /* Compute the registers we need to pop. */
20234 if (reg_containing_return_addr == -1)
20236 regs_to_pop |= 1 << LR_REGNUM;
20240 if (TARGET_BACKTRACE)
20242 /* Restore the (ARM) frame pointer and stack pointer. */
20243 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20247 /* If there is nothing to pop then just emit the BX instruction and
20249 if (pops_needed == 0)
20251 if (crtl->calls_eh_return)
20252 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20254 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20257 /* Otherwise if we are not supporting interworking and we have not created
20258 a backtrace structure and the function was not entered in ARM mode then
20259 just pop the return address straight into the PC. */
20260 else if (!TARGET_INTERWORK
20261 && !TARGET_BACKTRACE
20262 && !is_called_in_ARM_mode (current_function_decl)
20263 && !crtl->calls_eh_return)
20265 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20269 /* Find out how many of the (return) argument registers we can corrupt. */
20270 regs_available_for_popping = 0;
20272 /* If returning via __builtin_eh_return, the bottom three registers
20273 all contain information needed for the return. */
20274 if (crtl->calls_eh_return)
20278 /* If we can deduce the registers used from the function's
20279 return value. This is more reliable that examining
20280 df_regs_ever_live_p () because that will be set if the register is
20281 ever used in the function, not just if the register is used
20282 to hold a return value. */
20284 if (crtl->return_rtx != 0)
20285 mode = GET_MODE (crtl->return_rtx);
20287 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20289 size = GET_MODE_SIZE (mode);
20293 /* In a void function we can use any argument register.
20294 In a function that returns a structure on the stack
20295 we can use the second and third argument registers. */
20296 if (mode == VOIDmode)
20297 regs_available_for_popping =
20298 (1 << ARG_REGISTER (1))
20299 | (1 << ARG_REGISTER (2))
20300 | (1 << ARG_REGISTER (3));
20302 regs_available_for_popping =
20303 (1 << ARG_REGISTER (2))
20304 | (1 << ARG_REGISTER (3));
20306 else if (size <= 4)
20307 regs_available_for_popping =
20308 (1 << ARG_REGISTER (2))
20309 | (1 << ARG_REGISTER (3));
20310 else if (size <= 8)
20311 regs_available_for_popping =
20312 (1 << ARG_REGISTER (3));
20315 /* Match registers to be popped with registers into which we pop them. */
20316 for (available = regs_available_for_popping,
20317 required = regs_to_pop;
20318 required != 0 && available != 0;
20319 available &= ~(available & - available),
20320 required &= ~(required & - required))
20323 /* If we have any popping registers left over, remove them. */
20325 regs_available_for_popping &= ~available;
20327 /* Otherwise if we need another popping register we can use
20328 the fourth argument register. */
20329 else if (pops_needed)
20331 /* If we have not found any free argument registers and
20332 reg a4 contains the return address, we must move it. */
20333 if (regs_available_for_popping == 0
20334 && reg_containing_return_addr == LAST_ARG_REGNUM)
20336 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20337 reg_containing_return_addr = LR_REGNUM;
20339 else if (size > 12)
20341 /* Register a4 is being used to hold part of the return value,
20342 but we have dire need of a free, low register. */
20345 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20348 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20350 /* The fourth argument register is available. */
20351 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20357 /* Pop as many registers as we can. */
20358 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20359 regs_available_for_popping);
20361 /* Process the registers we popped. */
20362 if (reg_containing_return_addr == -1)
20364 /* The return address was popped into the lowest numbered register. */
20365 regs_to_pop &= ~(1 << LR_REGNUM);
20367 reg_containing_return_addr =
20368 number_of_first_bit_set (regs_available_for_popping);
20370 /* Remove this register for the mask of available registers, so that
20371 the return address will not be corrupted by further pops. */
20372 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20375 /* If we popped other registers then handle them here. */
20376 if (regs_available_for_popping)
20380 /* Work out which register currently contains the frame pointer. */
20381 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20383 /* Move it into the correct place. */
20384 asm_fprintf (f, "\tmov\t%r, %r\n",
20385 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20387 /* (Temporarily) remove it from the mask of popped registers. */
20388 regs_available_for_popping &= ~(1 << frame_pointer);
20389 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20391 if (regs_available_for_popping)
20395 /* We popped the stack pointer as well,
20396 find the register that contains it. */
20397 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20399 /* Move it into the stack register. */
20400 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20402 /* At this point we have popped all necessary registers, so
20403 do not worry about restoring regs_available_for_popping
20404 to its correct value:
20406 assert (pops_needed == 0)
20407 assert (regs_available_for_popping == (1 << frame_pointer))
20408 assert (regs_to_pop == (1 << STACK_POINTER)) */
20412 /* Since we have just move the popped value into the frame
20413 pointer, the popping register is available for reuse, and
20414 we know that we still have the stack pointer left to pop. */
20415 regs_available_for_popping |= (1 << frame_pointer);
20419 /* If we still have registers left on the stack, but we no longer have
20420 any registers into which we can pop them, then we must move the return
20421 address into the link register and make available the register that
20423 if (regs_available_for_popping == 0 && pops_needed > 0)
20425 regs_available_for_popping |= 1 << reg_containing_return_addr;
20427 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20428 reg_containing_return_addr);
20430 reg_containing_return_addr = LR_REGNUM;
20433 /* If we have registers left on the stack then pop some more.
20434 We know that at most we will want to pop FP and SP. */
20435 if (pops_needed > 0)
20440 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20441 regs_available_for_popping);
20443 /* We have popped either FP or SP.
20444 Move whichever one it is into the correct register. */
20445 popped_into = number_of_first_bit_set (regs_available_for_popping);
20446 move_to = number_of_first_bit_set (regs_to_pop);
20448 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20450 regs_to_pop &= ~(1 << move_to);
20455 /* If we still have not popped everything then we must have only
20456 had one register available to us and we are now popping the SP. */
20457 if (pops_needed > 0)
20461 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20462 regs_available_for_popping);
20464 popped_into = number_of_first_bit_set (regs_available_for_popping);
20466 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20468 assert (regs_to_pop == (1 << STACK_POINTER))
20469 assert (pops_needed == 1)
20473 /* If necessary restore the a4 register. */
20476 if (reg_containing_return_addr != LR_REGNUM)
20478 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20479 reg_containing_return_addr = LR_REGNUM;
20482 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20485 if (crtl->calls_eh_return)
20486 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20488 /* Return to caller. */
20489 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20492 /* Scan INSN just before assembler is output for it.
20493 For Thumb-1, we track the status of the condition codes; this
20494 information is used in the cbranchsi4_insn pattern. */
20496 thumb1_final_prescan_insn (rtx insn)
20498 if (flag_print_asm_name)
20499 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20500 INSN_ADDRESSES (INSN_UID (insn)));
20501 /* Don't overwrite the previous setter when we get to a cbranch. */
20502 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20504 enum attr_conds conds;
20506 if (cfun->machine->thumb1_cc_insn)
20508 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20509 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20512 conds = get_attr_conds (insn);
20513 if (conds == CONDS_SET)
20515 rtx set = single_set (insn);
20516 cfun->machine->thumb1_cc_insn = insn;
20517 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20518 cfun->machine->thumb1_cc_op1 = const0_rtx;
20519 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20520 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20522 rtx src1 = XEXP (SET_SRC (set), 1);
20523 if (src1 == const0_rtx)
20524 cfun->machine->thumb1_cc_mode = CCmode;
20527 else if (conds != CONDS_NOCOND)
20528 cfun->machine->thumb1_cc_insn = NULL_RTX;
20533 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20535 unsigned HOST_WIDE_INT mask = 0xff;
20538 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20539 if (val == 0) /* XXX */
20542 for (i = 0; i < 25; i++)
20543 if ((val & (mask << i)) == val)
20549 /* Returns nonzero if the current function contains,
20550 or might contain a far jump. */
20552 thumb_far_jump_used_p (void)
20556 /* This test is only important for leaf functions. */
20557 /* assert (!leaf_function_p ()); */
20559 /* If we have already decided that far jumps may be used,
20560 do not bother checking again, and always return true even if
20561 it turns out that they are not being used. Once we have made
20562 the decision that far jumps are present (and that hence the link
20563 register will be pushed onto the stack) we cannot go back on it. */
20564 if (cfun->machine->far_jump_used)
20567 /* If this function is not being called from the prologue/epilogue
20568 generation code then it must be being called from the
20569 INITIAL_ELIMINATION_OFFSET macro. */
20570 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20572 /* In this case we know that we are being asked about the elimination
20573 of the arg pointer register. If that register is not being used,
20574 then there are no arguments on the stack, and we do not have to
20575 worry that a far jump might force the prologue to push the link
20576 register, changing the stack offsets. In this case we can just
20577 return false, since the presence of far jumps in the function will
20578 not affect stack offsets.
20580 If the arg pointer is live (or if it was live, but has now been
20581 eliminated and so set to dead) then we do have to test to see if
20582 the function might contain a far jump. This test can lead to some
20583 false negatives, since before reload is completed, then length of
20584 branch instructions is not known, so gcc defaults to returning their
20585 longest length, which in turn sets the far jump attribute to true.
20587 A false negative will not result in bad code being generated, but it
20588 will result in a needless push and pop of the link register. We
20589 hope that this does not occur too often.
20591 If we need doubleword stack alignment this could affect the other
20592 elimination offsets so we can't risk getting it wrong. */
20593 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20594 cfun->machine->arg_pointer_live = 1;
20595 else if (!cfun->machine->arg_pointer_live)
20599 /* Check to see if the function contains a branch
20600 insn with the far jump attribute set. */
20601 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20603 if (GET_CODE (insn) == JUMP_INSN
20604 /* Ignore tablejump patterns. */
20605 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20606 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20607 && get_attr_far_jump (insn) == FAR_JUMP_YES
20610 /* Record the fact that we have decided that
20611 the function does use far jumps. */
20612 cfun->machine->far_jump_used = 1;
20620 /* Return nonzero if FUNC must be entered in ARM mode. */
20622 is_called_in_ARM_mode (tree func)
20624 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20626 /* Ignore the problem about functions whose address is taken. */
20627 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20631 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20637 /* Given the stack offsets and register mask in OFFSETS, decide how
20638 many additional registers to push instead of subtracting a constant
20639 from SP. For epilogues the principle is the same except we use pop.
20640 FOR_PROLOGUE indicates which we're generating. */
20642 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20644 HOST_WIDE_INT amount;
20645 unsigned long live_regs_mask = offsets->saved_regs_mask;
20646 /* Extract a mask of the ones we can give to the Thumb's push/pop
20648 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20649 /* Then count how many other high registers will need to be pushed. */
20650 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20651 int n_free, reg_base;
20653 if (!for_prologue && frame_pointer_needed)
20654 amount = offsets->locals_base - offsets->saved_regs;
20656 amount = offsets->outgoing_args - offsets->saved_regs;
20658 /* If the stack frame size is 512 exactly, we can save one load
20659 instruction, which should make this a win even when optimizing
20661 if (!optimize_size && amount != 512)
20664 /* Can't do this if there are high registers to push. */
20665 if (high_regs_pushed != 0)
20668 /* Shouldn't do it in the prologue if no registers would normally
20669 be pushed at all. In the epilogue, also allow it if we'll have
20670 a pop insn for the PC. */
20673 || TARGET_BACKTRACE
20674 || (live_regs_mask & 1 << LR_REGNUM) == 0
20675 || TARGET_INTERWORK
20676 || crtl->args.pretend_args_size != 0))
20679 /* Don't do this if thumb_expand_prologue wants to emit instructions
20680 between the push and the stack frame allocation. */
20682 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20683 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20690 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20691 live_regs_mask >>= reg_base;
20694 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20695 && (for_prologue || call_used_regs[reg_base + n_free]))
20697 live_regs_mask >>= 1;
20703 gcc_assert (amount / 4 * 4 == amount);
20705 if (amount >= 512 && (amount - n_free * 4) < 512)
20706 return (amount - 508) / 4;
20707 if (amount <= n_free * 4)
20712 /* The bits which aren't usefully expanded as rtl. */
20714 thumb_unexpanded_epilogue (void)
20716 arm_stack_offsets *offsets;
20718 unsigned long live_regs_mask = 0;
20719 int high_regs_pushed = 0;
20721 int had_to_push_lr;
20724 if (cfun->machine->return_used_this_function != 0)
20727 if (IS_NAKED (arm_current_func_type ()))
20730 offsets = arm_get_frame_offsets ();
20731 live_regs_mask = offsets->saved_regs_mask;
20732 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20734 /* If we can deduce the registers used from the function's return value.
20735 This is more reliable that examining df_regs_ever_live_p () because that
20736 will be set if the register is ever used in the function, not just if
20737 the register is used to hold a return value. */
20738 size = arm_size_return_regs ();
20740 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20743 unsigned long extra_mask = (1 << extra_pop) - 1;
20744 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20747 /* The prolog may have pushed some high registers to use as
20748 work registers. e.g. the testsuite file:
20749 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20750 compiles to produce:
20751 push {r4, r5, r6, r7, lr}
20755 as part of the prolog. We have to undo that pushing here. */
20757 if (high_regs_pushed)
20759 unsigned long mask = live_regs_mask & 0xff;
20762 /* The available low registers depend on the size of the value we are
20770 /* Oh dear! We have no low registers into which we can pop
20773 ("no low registers available for popping high registers");
20775 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20776 if (live_regs_mask & (1 << next_hi_reg))
20779 while (high_regs_pushed)
20781 /* Find lo register(s) into which the high register(s) can
20783 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20785 if (mask & (1 << regno))
20786 high_regs_pushed--;
20787 if (high_regs_pushed == 0)
20791 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20793 /* Pop the values into the low register(s). */
20794 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20796 /* Move the value(s) into the high registers. */
20797 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20799 if (mask & (1 << regno))
20801 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20804 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20805 if (live_regs_mask & (1 << next_hi_reg))
20810 live_regs_mask &= ~0x0f00;
20813 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20814 live_regs_mask &= 0xff;
20816 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20818 /* Pop the return address into the PC. */
20819 if (had_to_push_lr)
20820 live_regs_mask |= 1 << PC_REGNUM;
20822 /* Either no argument registers were pushed or a backtrace
20823 structure was created which includes an adjusted stack
20824 pointer, so just pop everything. */
20825 if (live_regs_mask)
20826 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20829 /* We have either just popped the return address into the
20830 PC or it is was kept in LR for the entire function.
20831 Note that thumb_pushpop has already called thumb_exit if the
20832 PC was in the list. */
20833 if (!had_to_push_lr)
20834 thumb_exit (asm_out_file, LR_REGNUM);
20838 /* Pop everything but the return address. */
20839 if (live_regs_mask)
20840 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20843 if (had_to_push_lr)
20847 /* We have no free low regs, so save one. */
20848 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20852 /* Get the return address into a temporary register. */
20853 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20854 1 << LAST_ARG_REGNUM);
20858 /* Move the return address to lr. */
20859 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20861 /* Restore the low register. */
20862 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20867 regno = LAST_ARG_REGNUM;
20872 /* Remove the argument registers that were pushed onto the stack. */
20873 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20874 SP_REGNUM, SP_REGNUM,
20875 crtl->args.pretend_args_size);
20877 thumb_exit (asm_out_file, regno);
20883 /* Functions to save and restore machine-specific function data. */
20884 static struct machine_function *
20885 arm_init_machine_status (void)
20887 struct machine_function *machine;
20888 machine = ggc_alloc_cleared_machine_function ();
20890 #if ARM_FT_UNKNOWN != 0
20891 machine->func_type = ARM_FT_UNKNOWN;
20896 /* Return an RTX indicating where the return address to the
20897 calling function can be found. */
20899 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20904 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20907 /* Do anything needed before RTL is emitted for each function. */
20909 arm_init_expanders (void)
20911 /* Arrange to initialize and mark the machine per-function status. */
20912 init_machine_status = arm_init_machine_status;
20914 /* This is to stop the combine pass optimizing away the alignment
20915 adjustment of va_arg. */
20916 /* ??? It is claimed that this should not be necessary. */
20918 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20922 /* Like arm_compute_initial_elimination offset. Simpler because there
20923 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20924 to point at the base of the local variables after static stack
20925 space for a function has been allocated. */
20928 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20930 arm_stack_offsets *offsets;
20932 offsets = arm_get_frame_offsets ();
20936 case ARG_POINTER_REGNUM:
20939 case STACK_POINTER_REGNUM:
20940 return offsets->outgoing_args - offsets->saved_args;
20942 case FRAME_POINTER_REGNUM:
20943 return offsets->soft_frame - offsets->saved_args;
20945 case ARM_HARD_FRAME_POINTER_REGNUM:
20946 return offsets->saved_regs - offsets->saved_args;
20948 case THUMB_HARD_FRAME_POINTER_REGNUM:
20949 return offsets->locals_base - offsets->saved_args;
20952 gcc_unreachable ();
20956 case FRAME_POINTER_REGNUM:
20959 case STACK_POINTER_REGNUM:
20960 return offsets->outgoing_args - offsets->soft_frame;
20962 case ARM_HARD_FRAME_POINTER_REGNUM:
20963 return offsets->saved_regs - offsets->soft_frame;
20965 case THUMB_HARD_FRAME_POINTER_REGNUM:
20966 return offsets->locals_base - offsets->soft_frame;
20969 gcc_unreachable ();
20974 gcc_unreachable ();
20978 /* Generate the rest of a function's prologue. */
20980 thumb1_expand_prologue (void)
20984 HOST_WIDE_INT amount;
20985 arm_stack_offsets *offsets;
20986 unsigned long func_type;
20988 unsigned long live_regs_mask;
20990 func_type = arm_current_func_type ();
20992 /* Naked functions don't have prologues. */
20993 if (IS_NAKED (func_type))
20996 if (IS_INTERRUPT (func_type))
20998 error ("interrupt Service Routines cannot be coded in Thumb mode");
21002 offsets = arm_get_frame_offsets ();
21003 live_regs_mask = offsets->saved_regs_mask;
21004 /* Load the pic register before setting the frame pointer,
21005 so we can use r7 as a temporary work register. */
21006 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21007 arm_load_pic_register (live_regs_mask);
21009 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21010 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
21011 stack_pointer_rtx);
21013 if (flag_stack_usage)
21014 current_function_static_stack_size
21015 = offsets->outgoing_args - offsets->saved_args;
21017 amount = offsets->outgoing_args - offsets->saved_regs;
21018 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
21023 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21024 GEN_INT (- amount)));
21025 RTX_FRAME_RELATED_P (insn) = 1;
21031 /* The stack decrement is too big for an immediate value in a single
21032 insn. In theory we could issue multiple subtracts, but after
21033 three of them it becomes more space efficient to place the full
21034 value in the constant pool and load into a register. (Also the
21035 ARM debugger really likes to see only one stack decrement per
21036 function). So instead we look for a scratch register into which
21037 we can load the decrement, and then we subtract this from the
21038 stack pointer. Unfortunately on the thumb the only available
21039 scratch registers are the argument registers, and we cannot use
21040 these as they may hold arguments to the function. Instead we
21041 attempt to locate a call preserved register which is used by this
21042 function. If we can find one, then we know that it will have
21043 been pushed at the start of the prologue and so we can corrupt
21045 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
21046 if (live_regs_mask & (1 << regno))
21049 gcc_assert(regno <= LAST_LO_REGNUM);
21051 reg = gen_rtx_REG (SImode, regno);
21053 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
21055 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21056 stack_pointer_rtx, reg));
21057 RTX_FRAME_RELATED_P (insn) = 1;
21058 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21059 plus_constant (stack_pointer_rtx,
21061 RTX_FRAME_RELATED_P (dwarf) = 1;
21062 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21066 if (frame_pointer_needed)
21067 thumb_set_frame_pointer (offsets);
21069 /* If we are profiling, make sure no instructions are scheduled before
21070 the call to mcount. Similarly if the user has requested no
21071 scheduling in the prolog. Similarly if we want non-call exceptions
21072 using the EABI unwinder, to prevent faulting instructions from being
21073 swapped with a stack adjustment. */
21074 if (crtl->profile || !TARGET_SCHED_PROLOG
21075 || (arm_except_unwind_info (&global_options) == UI_TARGET
21076 && cfun->can_throw_non_call_exceptions))
21077 emit_insn (gen_blockage ());
21079 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
21080 if (live_regs_mask & 0xff)
21081 cfun->machine->lr_save_eliminated = 0;
21086 thumb1_expand_epilogue (void)
21088 HOST_WIDE_INT amount;
21089 arm_stack_offsets *offsets;
21092 /* Naked functions don't have prologues. */
21093 if (IS_NAKED (arm_current_func_type ()))
21096 offsets = arm_get_frame_offsets ();
21097 amount = offsets->outgoing_args - offsets->saved_regs;
21099 if (frame_pointer_needed)
21101 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
21102 amount = offsets->locals_base - offsets->saved_regs;
21104 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
21106 gcc_assert (amount >= 0);
21110 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21111 GEN_INT (amount)));
21114 /* r3 is always free in the epilogue. */
21115 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
21117 emit_insn (gen_movsi (reg, GEN_INT (amount)));
21118 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
21122 /* Emit a USE (stack_pointer_rtx), so that
21123 the stack adjustment will not be deleted. */
21124 emit_insn (gen_prologue_use (stack_pointer_rtx));
21126 if (crtl->profile || !TARGET_SCHED_PROLOG)
21127 emit_insn (gen_blockage ());
21129 /* Emit a clobber for each insn that will be restored in the epilogue,
21130 so that flow2 will get register lifetimes correct. */
21131 for (regno = 0; regno < 13; regno++)
21132 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
21133 emit_clobber (gen_rtx_REG (SImode, regno));
21135 if (! df_regs_ever_live_p (LR_REGNUM))
21136 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
21140 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
21142 arm_stack_offsets *offsets;
21143 unsigned long live_regs_mask = 0;
21144 unsigned long l_mask;
21145 unsigned high_regs_pushed = 0;
21146 int cfa_offset = 0;
21149 if (IS_NAKED (arm_current_func_type ()))
21152 if (is_called_in_ARM_mode (current_function_decl))
21156 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21157 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21159 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21161 /* Generate code sequence to switch us into Thumb mode. */
21162 /* The .code 32 directive has already been emitted by
21163 ASM_DECLARE_FUNCTION_NAME. */
21164 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21165 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21167 /* Generate a label, so that the debugger will notice the
21168 change in instruction sets. This label is also used by
21169 the assembler to bypass the ARM code when this function
21170 is called from a Thumb encoded function elsewhere in the
21171 same file. Hence the definition of STUB_NAME here must
21172 agree with the definition in gas/config/tc-arm.c. */
21174 #define STUB_NAME ".real_start_of"
21176 fprintf (f, "\t.code\t16\n");
21178 if (arm_dllexport_name_p (name))
21179 name = arm_strip_name_encoding (name);
21181 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21182 fprintf (f, "\t.thumb_func\n");
21183 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21186 if (crtl->args.pretend_args_size)
21188 /* Output unwind directive for the stack adjustment. */
21189 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21190 fprintf (f, "\t.pad #%d\n",
21191 crtl->args.pretend_args_size);
21193 if (cfun->machine->uses_anonymous_args)
21197 fprintf (f, "\tpush\t{");
21199 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21201 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
21202 regno <= LAST_ARG_REGNUM;
21204 asm_fprintf (f, "%r%s", regno,
21205 regno == LAST_ARG_REGNUM ? "" : ", ");
21207 fprintf (f, "}\n");
21210 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
21211 SP_REGNUM, SP_REGNUM,
21212 crtl->args.pretend_args_size);
21214 /* We don't need to record the stores for unwinding (would it
21215 help the debugger any if we did?), but record the change in
21216 the stack pointer. */
21217 if (dwarf2out_do_frame ())
21219 char *l = dwarf2out_cfi_label (false);
21221 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21222 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21226 /* Get the registers we are going to push. */
21227 offsets = arm_get_frame_offsets ();
21228 live_regs_mask = offsets->saved_regs_mask;
21229 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21230 l_mask = live_regs_mask & 0x40ff;
21231 /* Then count how many other high registers will need to be pushed. */
21232 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21234 if (TARGET_BACKTRACE)
21237 unsigned work_register;
21239 /* We have been asked to create a stack backtrace structure.
21240 The code looks like this:
21244 0 sub SP, #16 Reserve space for 4 registers.
21245 2 push {R7} Push low registers.
21246 4 add R7, SP, #20 Get the stack pointer before the push.
21247 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21248 8 mov R7, PC Get hold of the start of this code plus 12.
21249 10 str R7, [SP, #16] Store it.
21250 12 mov R7, FP Get hold of the current frame pointer.
21251 14 str R7, [SP, #4] Store it.
21252 16 mov R7, LR Get hold of the current return address.
21253 18 str R7, [SP, #12] Store it.
21254 20 add R7, SP, #16 Point at the start of the backtrace structure.
21255 22 mov FP, R7 Put this value into the frame pointer. */
21257 work_register = thumb_find_work_register (live_regs_mask);
21259 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21260 asm_fprintf (f, "\t.pad #16\n");
21263 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21264 SP_REGNUM, SP_REGNUM);
21266 if (dwarf2out_do_frame ())
21268 char *l = dwarf2out_cfi_label (false);
21270 cfa_offset = cfa_offset + 16;
21271 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21276 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21277 offset = bit_count (l_mask) * UNITS_PER_WORD;
21282 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21283 offset + 16 + crtl->args.pretend_args_size);
21285 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21288 /* Make sure that the instruction fetching the PC is in the right place
21289 to calculate "start of backtrace creation code + 12". */
21292 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21293 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21295 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21296 ARM_HARD_FRAME_POINTER_REGNUM);
21297 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21302 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21303 ARM_HARD_FRAME_POINTER_REGNUM);
21304 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21306 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21307 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21311 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21312 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21314 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21316 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21317 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21319 /* Optimization: If we are not pushing any low registers but we are going
21320 to push some high registers then delay our first push. This will just
21321 be a push of LR and we can combine it with the push of the first high
21323 else if ((l_mask & 0xff) != 0
21324 || (high_regs_pushed == 0 && l_mask))
21326 unsigned long mask = l_mask;
21327 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21328 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21331 if (high_regs_pushed)
21333 unsigned pushable_regs;
21334 unsigned next_hi_reg;
21336 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21337 if (live_regs_mask & (1 << next_hi_reg))
21340 pushable_regs = l_mask & 0xff;
21342 if (pushable_regs == 0)
21343 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21345 while (high_regs_pushed > 0)
21347 unsigned long real_regs_mask = 0;
21349 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21351 if (pushable_regs & (1 << regno))
21353 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21355 high_regs_pushed --;
21356 real_regs_mask |= (1 << next_hi_reg);
21358 if (high_regs_pushed)
21360 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21362 if (live_regs_mask & (1 << next_hi_reg))
21367 pushable_regs &= ~((1 << regno) - 1);
21373 /* If we had to find a work register and we have not yet
21374 saved the LR then add it to the list of regs to push. */
21375 if (l_mask == (1 << LR_REGNUM))
21377 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21379 real_regs_mask | (1 << LR_REGNUM));
21383 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21388 /* Handle the case of a double word load into a low register from
21389 a computed memory address. The computed address may involve a
21390 register which is overwritten by the load. */
21392 thumb_load_double_from_address (rtx *operands)
21400 gcc_assert (GET_CODE (operands[0]) == REG);
21401 gcc_assert (GET_CODE (operands[1]) == MEM);
21403 /* Get the memory address. */
21404 addr = XEXP (operands[1], 0);
21406 /* Work out how the memory address is computed. */
21407 switch (GET_CODE (addr))
21410 operands[2] = adjust_address (operands[1], SImode, 4);
21412 if (REGNO (operands[0]) == REGNO (addr))
21414 output_asm_insn ("ldr\t%H0, %2", operands);
21415 output_asm_insn ("ldr\t%0, %1", operands);
21419 output_asm_insn ("ldr\t%0, %1", operands);
21420 output_asm_insn ("ldr\t%H0, %2", operands);
21425 /* Compute <address> + 4 for the high order load. */
21426 operands[2] = adjust_address (operands[1], SImode, 4);
21428 output_asm_insn ("ldr\t%0, %1", operands);
21429 output_asm_insn ("ldr\t%H0, %2", operands);
21433 arg1 = XEXP (addr, 0);
21434 arg2 = XEXP (addr, 1);
21436 if (CONSTANT_P (arg1))
21437 base = arg2, offset = arg1;
21439 base = arg1, offset = arg2;
21441 gcc_assert (GET_CODE (base) == REG);
21443 /* Catch the case of <address> = <reg> + <reg> */
21444 if (GET_CODE (offset) == REG)
21446 int reg_offset = REGNO (offset);
21447 int reg_base = REGNO (base);
21448 int reg_dest = REGNO (operands[0]);
21450 /* Add the base and offset registers together into the
21451 higher destination register. */
21452 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21453 reg_dest + 1, reg_base, reg_offset);
21455 /* Load the lower destination register from the address in
21456 the higher destination register. */
21457 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21458 reg_dest, reg_dest + 1);
21460 /* Load the higher destination register from its own address
21462 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21463 reg_dest + 1, reg_dest + 1);
21467 /* Compute <address> + 4 for the high order load. */
21468 operands[2] = adjust_address (operands[1], SImode, 4);
21470 /* If the computed address is held in the low order register
21471 then load the high order register first, otherwise always
21472 load the low order register first. */
21473 if (REGNO (operands[0]) == REGNO (base))
21475 output_asm_insn ("ldr\t%H0, %2", operands);
21476 output_asm_insn ("ldr\t%0, %1", operands);
21480 output_asm_insn ("ldr\t%0, %1", operands);
21481 output_asm_insn ("ldr\t%H0, %2", operands);
21487 /* With no registers to worry about we can just load the value
21489 operands[2] = adjust_address (operands[1], SImode, 4);
21491 output_asm_insn ("ldr\t%H0, %2", operands);
21492 output_asm_insn ("ldr\t%0, %1", operands);
21496 gcc_unreachable ();
21503 thumb_output_move_mem_multiple (int n, rtx *operands)
21510 if (REGNO (operands[4]) > REGNO (operands[5]))
21513 operands[4] = operands[5];
21516 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21517 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21521 if (REGNO (operands[4]) > REGNO (operands[5]))
21524 operands[4] = operands[5];
21527 if (REGNO (operands[5]) > REGNO (operands[6]))
21530 operands[5] = operands[6];
21533 if (REGNO (operands[4]) > REGNO (operands[5]))
21536 operands[4] = operands[5];
21540 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21541 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21545 gcc_unreachable ();
21551 /* Output a call-via instruction for thumb state. */
21553 thumb_call_via_reg (rtx reg)
21555 int regno = REGNO (reg);
21558 gcc_assert (regno < LR_REGNUM);
21560 /* If we are in the normal text section we can use a single instance
21561 per compilation unit. If we are doing function sections, then we need
21562 an entry per section, since we can't rely on reachability. */
21563 if (in_section == text_section)
21565 thumb_call_reg_needed = 1;
21567 if (thumb_call_via_label[regno] == NULL)
21568 thumb_call_via_label[regno] = gen_label_rtx ();
21569 labelp = thumb_call_via_label + regno;
21573 if (cfun->machine->call_via[regno] == NULL)
21574 cfun->machine->call_via[regno] = gen_label_rtx ();
21575 labelp = cfun->machine->call_via + regno;
21578 output_asm_insn ("bl\t%a0", labelp);
21582 /* Routines for generating rtl. */
21584 thumb_expand_movmemqi (rtx *operands)
21586 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21587 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21588 HOST_WIDE_INT len = INTVAL (operands[2]);
21589 HOST_WIDE_INT offset = 0;
21593 emit_insn (gen_movmem12b (out, in, out, in));
21599 emit_insn (gen_movmem8b (out, in, out, in));
21605 rtx reg = gen_reg_rtx (SImode);
21606 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21607 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21614 rtx reg = gen_reg_rtx (HImode);
21615 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21616 plus_constant (in, offset))));
21617 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21625 rtx reg = gen_reg_rtx (QImode);
21626 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21627 plus_constant (in, offset))));
21628 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21634 thumb_reload_out_hi (rtx *operands)
21636 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21639 /* Handle reading a half-word from memory during reload. */
21641 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21643 gcc_unreachable ();
21646 /* Return the length of a function name prefix
21647 that starts with the character 'c'. */
21649 arm_get_strip_length (int c)
21653 ARM_NAME_ENCODING_LENGTHS
21658 /* Return a pointer to a function's name with any
21659 and all prefix encodings stripped from it. */
21661 arm_strip_name_encoding (const char *name)
21665 while ((skip = arm_get_strip_length (* name)))
21671 /* If there is a '*' anywhere in the name's prefix, then
21672 emit the stripped name verbatim, otherwise prepend an
21673 underscore if leading underscores are being used. */
21675 arm_asm_output_labelref (FILE *stream, const char *name)
21680 while ((skip = arm_get_strip_length (* name)))
21682 verbatim |= (*name == '*');
21687 fputs (name, stream);
21689 asm_fprintf (stream, "%U%s", name);
21693 arm_file_start (void)
21697 if (TARGET_UNIFIED_ASM)
21698 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21702 const char *fpu_name;
21703 if (arm_selected_arch)
21704 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21706 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21708 if (TARGET_SOFT_FLOAT)
21711 fpu_name = "softvfp";
21713 fpu_name = "softfpa";
21717 fpu_name = arm_fpu_desc->name;
21718 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21720 if (TARGET_HARD_FLOAT)
21721 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21722 if (TARGET_HARD_FLOAT_ABI)
21723 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21726 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21728 /* Some of these attributes only apply when the corresponding features
21729 are used. However we don't have any easy way of figuring this out.
21730 Conservatively record the setting that would have been used. */
21732 /* Tag_ABI_FP_rounding. */
21733 if (flag_rounding_math)
21734 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21735 if (!flag_unsafe_math_optimizations)
21737 /* Tag_ABI_FP_denomal. */
21738 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21739 /* Tag_ABI_FP_exceptions. */
21740 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21742 /* Tag_ABI_FP_user_exceptions. */
21743 if (flag_signaling_nans)
21744 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21745 /* Tag_ABI_FP_number_model. */
21746 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21747 flag_finite_math_only ? 1 : 3);
21749 /* Tag_ABI_align8_needed. */
21750 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21751 /* Tag_ABI_align8_preserved. */
21752 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21753 /* Tag_ABI_enum_size. */
21754 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21755 flag_short_enums ? 1 : 2);
21757 /* Tag_ABI_optimization_goals. */
21760 else if (optimize >= 2)
21766 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21768 /* Tag_ABI_FP_16bit_format. */
21769 if (arm_fp16_format)
21770 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21771 (int)arm_fp16_format);
21773 if (arm_lang_output_object_attributes_hook)
21774 arm_lang_output_object_attributes_hook();
21776 default_file_start();
21780 arm_file_end (void)
21784 if (NEED_INDICATE_EXEC_STACK)
21785 /* Add .note.GNU-stack. */
21786 file_end_indicate_exec_stack ();
21788 if (! thumb_call_reg_needed)
21791 switch_to_section (text_section);
21792 asm_fprintf (asm_out_file, "\t.code 16\n");
21793 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21795 for (regno = 0; regno < LR_REGNUM; regno++)
21797 rtx label = thumb_call_via_label[regno];
21801 targetm.asm_out.internal_label (asm_out_file, "L",
21802 CODE_LABEL_NUMBER (label));
21803 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21809 /* Symbols in the text segment can be accessed without indirecting via the
21810 constant pool; it may take an extra binary operation, but this is still
21811 faster than indirecting via memory. Don't do this when not optimizing,
21812 since we won't be calculating al of the offsets necessary to do this
21816 arm_encode_section_info (tree decl, rtx rtl, int first)
21818 if (optimize > 0 && TREE_CONSTANT (decl))
21819 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21821 default_encode_section_info (decl, rtl, first);
21823 #endif /* !ARM_PE */
21826 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21828 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21829 && !strcmp (prefix, "L"))
21831 arm_ccfsm_state = 0;
21832 arm_target_insn = NULL;
21834 default_internal_label (stream, prefix, labelno);
21837 /* Output code to add DELTA to the first argument, and then jump
21838 to FUNCTION. Used for C++ multiple inheritance. */
21840 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21841 HOST_WIDE_INT delta,
21842 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21845 static int thunk_label = 0;
21848 int mi_delta = delta;
21849 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21851 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21854 mi_delta = - mi_delta;
21858 int labelno = thunk_label++;
21859 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21860 /* Thunks are entered in arm mode when avaiable. */
21861 if (TARGET_THUMB1_ONLY)
21863 /* push r3 so we can use it as a temporary. */
21864 /* TODO: Omit this save if r3 is not used. */
21865 fputs ("\tpush {r3}\n", file);
21866 fputs ("\tldr\tr3, ", file);
21870 fputs ("\tldr\tr12, ", file);
21872 assemble_name (file, label);
21873 fputc ('\n', file);
21876 /* If we are generating PIC, the ldr instruction below loads
21877 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21878 the address of the add + 8, so we have:
21880 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21883 Note that we have "+ 1" because some versions of GNU ld
21884 don't set the low bit of the result for R_ARM_REL32
21885 relocations against thumb function symbols.
21886 On ARMv6M this is +4, not +8. */
21887 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21888 assemble_name (file, labelpc);
21889 fputs (":\n", file);
21890 if (TARGET_THUMB1_ONLY)
21892 /* This is 2 insns after the start of the thunk, so we know it
21893 is 4-byte aligned. */
21894 fputs ("\tadd\tr3, pc, r3\n", file);
21895 fputs ("\tmov r12, r3\n", file);
21898 fputs ("\tadd\tr12, pc, r12\n", file);
21900 else if (TARGET_THUMB1_ONLY)
21901 fputs ("\tmov r12, r3\n", file);
21903 if (TARGET_THUMB1_ONLY)
21905 if (mi_delta > 255)
21907 fputs ("\tldr\tr3, ", file);
21908 assemble_name (file, label);
21909 fputs ("+4\n", file);
21910 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21911 mi_op, this_regno, this_regno);
21913 else if (mi_delta != 0)
21915 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21916 mi_op, this_regno, this_regno,
21922 /* TODO: Use movw/movt for large constants when available. */
21923 while (mi_delta != 0)
21925 if ((mi_delta & (3 << shift)) == 0)
21929 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21930 mi_op, this_regno, this_regno,
21931 mi_delta & (0xff << shift));
21932 mi_delta &= ~(0xff << shift);
21939 if (TARGET_THUMB1_ONLY)
21940 fputs ("\tpop\t{r3}\n", file);
21942 fprintf (file, "\tbx\tr12\n");
21943 ASM_OUTPUT_ALIGN (file, 2);
21944 assemble_name (file, label);
21945 fputs (":\n", file);
21948 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21949 rtx tem = XEXP (DECL_RTL (function), 0);
21950 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21951 tem = gen_rtx_MINUS (GET_MODE (tem),
21953 gen_rtx_SYMBOL_REF (Pmode,
21954 ggc_strdup (labelpc)));
21955 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21958 /* Output ".word .LTHUNKn". */
21959 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21961 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21962 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21966 fputs ("\tb\t", file);
21967 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21968 if (NEED_PLT_RELOC)
21969 fputs ("(PLT)", file);
21970 fputc ('\n', file);
21975 arm_emit_vector_const (FILE *file, rtx x)
21978 const char * pattern;
21980 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21982 switch (GET_MODE (x))
21984 case V2SImode: pattern = "%08x"; break;
21985 case V4HImode: pattern = "%04x"; break;
21986 case V8QImode: pattern = "%02x"; break;
21987 default: gcc_unreachable ();
21990 fprintf (file, "0x");
21991 for (i = CONST_VECTOR_NUNITS (x); i--;)
21995 element = CONST_VECTOR_ELT (x, i);
21996 fprintf (file, pattern, INTVAL (element));
22002 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22003 HFmode constant pool entries are actually loaded with ldr. */
22005 arm_emit_fp16_const (rtx c)
22010 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22011 bits = real_to_target (NULL, &r, HFmode);
22012 if (WORDS_BIG_ENDIAN)
22013 assemble_zeros (2);
22014 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22015 if (!WORDS_BIG_ENDIAN)
22016 assemble_zeros (2);
22020 arm_output_load_gr (rtx *operands)
22027 if (GET_CODE (operands [1]) != MEM
22028 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22029 || GET_CODE (reg = XEXP (sum, 0)) != REG
22030 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22031 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22032 return "wldrw%?\t%0, %1";
22034 /* Fix up an out-of-range load of a GR register. */
22035 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22036 wcgr = operands[0];
22038 output_asm_insn ("ldr%?\t%0, %1", operands);
22040 operands[0] = wcgr;
22042 output_asm_insn ("tmcr%?\t%0, %1", operands);
22043 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
22048 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22050 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22051 named arg and all anonymous args onto the stack.
22052 XXX I know the prologue shouldn't be pushing registers, but it is faster
22056 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
22057 enum machine_mode mode,
22060 int second_time ATTRIBUTE_UNUSED)
22064 cfun->machine->uses_anonymous_args = 1;
22065 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
22067 nregs = pcum->aapcs_ncrn;
22068 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
22072 nregs = pcum->nregs;
22074 if (nregs < NUM_ARG_REGS)
22075 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
22078 /* Return nonzero if the CONSUMER instruction (a store) does not need
22079 PRODUCER's value to calculate the address. */
22082 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
22084 rtx value = PATTERN (producer);
22085 rtx addr = PATTERN (consumer);
22087 if (GET_CODE (value) == COND_EXEC)
22088 value = COND_EXEC_CODE (value);
22089 if (GET_CODE (value) == PARALLEL)
22090 value = XVECEXP (value, 0, 0);
22091 value = XEXP (value, 0);
22092 if (GET_CODE (addr) == COND_EXEC)
22093 addr = COND_EXEC_CODE (addr);
22094 if (GET_CODE (addr) == PARALLEL)
22095 addr = XVECEXP (addr, 0, 0);
22096 addr = XEXP (addr, 0);
22098 return !reg_overlap_mentioned_p (value, addr);
22101 /* Return nonzero if the CONSUMER instruction (a store) does need
22102 PRODUCER's value to calculate the address. */
22105 arm_early_store_addr_dep (rtx producer, rtx consumer)
22107 return !arm_no_early_store_addr_dep (producer, consumer);
22110 /* Return nonzero if the CONSUMER instruction (a load) does need
22111 PRODUCER's value to calculate the address. */
22114 arm_early_load_addr_dep (rtx producer, rtx consumer)
22116 rtx value = PATTERN (producer);
22117 rtx addr = PATTERN (consumer);
22119 if (GET_CODE (value) == COND_EXEC)
22120 value = COND_EXEC_CODE (value);
22121 if (GET_CODE (value) == PARALLEL)
22122 value = XVECEXP (value, 0, 0);
22123 value = XEXP (value, 0);
22124 if (GET_CODE (addr) == COND_EXEC)
22125 addr = COND_EXEC_CODE (addr);
22126 if (GET_CODE (addr) == PARALLEL)
22127 addr = XVECEXP (addr, 0, 0);
22128 addr = XEXP (addr, 1);
22130 return reg_overlap_mentioned_p (value, addr);
22133 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22134 have an early register shift value or amount dependency on the
22135 result of PRODUCER. */
22138 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
22140 rtx value = PATTERN (producer);
22141 rtx op = PATTERN (consumer);
22144 if (GET_CODE (value) == COND_EXEC)
22145 value = COND_EXEC_CODE (value);
22146 if (GET_CODE (value) == PARALLEL)
22147 value = XVECEXP (value, 0, 0);
22148 value = XEXP (value, 0);
22149 if (GET_CODE (op) == COND_EXEC)
22150 op = COND_EXEC_CODE (op);
22151 if (GET_CODE (op) == PARALLEL)
22152 op = XVECEXP (op, 0, 0);
22155 early_op = XEXP (op, 0);
22156 /* This is either an actual independent shift, or a shift applied to
22157 the first operand of another operation. We want the whole shift
22159 if (GET_CODE (early_op) == REG)
22162 return !reg_overlap_mentioned_p (value, early_op);
22165 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22166 have an early register shift value dependency on the result of
22170 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22172 rtx value = PATTERN (producer);
22173 rtx op = PATTERN (consumer);
22176 if (GET_CODE (value) == COND_EXEC)
22177 value = COND_EXEC_CODE (value);
22178 if (GET_CODE (value) == PARALLEL)
22179 value = XVECEXP (value, 0, 0);
22180 value = XEXP (value, 0);
22181 if (GET_CODE (op) == COND_EXEC)
22182 op = COND_EXEC_CODE (op);
22183 if (GET_CODE (op) == PARALLEL)
22184 op = XVECEXP (op, 0, 0);
22187 early_op = XEXP (op, 0);
22189 /* This is either an actual independent shift, or a shift applied to
22190 the first operand of another operation. We want the value being
22191 shifted, in either case. */
22192 if (GET_CODE (early_op) != REG)
22193 early_op = XEXP (early_op, 0);
22195 return !reg_overlap_mentioned_p (value, early_op);
22198 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22199 have an early register mult dependency on the result of
22203 arm_no_early_mul_dep (rtx producer, rtx consumer)
22205 rtx value = PATTERN (producer);
22206 rtx op = PATTERN (consumer);
22208 if (GET_CODE (value) == COND_EXEC)
22209 value = COND_EXEC_CODE (value);
22210 if (GET_CODE (value) == PARALLEL)
22211 value = XVECEXP (value, 0, 0);
22212 value = XEXP (value, 0);
22213 if (GET_CODE (op) == COND_EXEC)
22214 op = COND_EXEC_CODE (op);
22215 if (GET_CODE (op) == PARALLEL)
22216 op = XVECEXP (op, 0, 0);
22219 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22221 if (GET_CODE (XEXP (op, 0)) == MULT)
22222 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22224 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22230 /* We can't rely on the caller doing the proper promotion when
22231 using APCS or ATPCS. */
22234 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22236 return !TARGET_AAPCS_BASED;
22239 static enum machine_mode
22240 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22241 enum machine_mode mode,
22242 int *punsignedp ATTRIBUTE_UNUSED,
22243 const_tree fntype ATTRIBUTE_UNUSED,
22244 int for_return ATTRIBUTE_UNUSED)
22246 if (GET_MODE_CLASS (mode) == MODE_INT
22247 && GET_MODE_SIZE (mode) < 4)
22253 /* AAPCS based ABIs use short enums by default. */
22256 arm_default_short_enums (void)
22258 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22262 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22265 arm_align_anon_bitfield (void)
22267 return TARGET_AAPCS_BASED;
22271 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22274 arm_cxx_guard_type (void)
22276 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22279 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22280 has an accumulator dependency on the result of the producer (a
22281 multiplication instruction) and no other dependency on that result. */
22283 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22285 rtx mul = PATTERN (producer);
22286 rtx mac = PATTERN (consumer);
22288 rtx mac_op0, mac_op1, mac_acc;
22290 if (GET_CODE (mul) == COND_EXEC)
22291 mul = COND_EXEC_CODE (mul);
22292 if (GET_CODE (mac) == COND_EXEC)
22293 mac = COND_EXEC_CODE (mac);
22295 /* Check that mul is of the form (set (...) (mult ...))
22296 and mla is of the form (set (...) (plus (mult ...) (...))). */
22297 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22298 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22299 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22302 mul_result = XEXP (mul, 0);
22303 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22304 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22305 mac_acc = XEXP (XEXP (mac, 1), 1);
22307 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22308 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22309 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22313 /* The EABI says test the least significant bit of a guard variable. */
22316 arm_cxx_guard_mask_bit (void)
22318 return TARGET_AAPCS_BASED;
22322 /* The EABI specifies that all array cookies are 8 bytes long. */
22325 arm_get_cookie_size (tree type)
22329 if (!TARGET_AAPCS_BASED)
22330 return default_cxx_get_cookie_size (type);
22332 size = build_int_cst (sizetype, 8);
22337 /* The EABI says that array cookies should also contain the element size. */
22340 arm_cookie_has_size (void)
22342 return TARGET_AAPCS_BASED;
22346 /* The EABI says constructors and destructors should return a pointer to
22347 the object constructed/destroyed. */
22350 arm_cxx_cdtor_returns_this (void)
22352 return TARGET_AAPCS_BASED;
22355 /* The EABI says that an inline function may never be the key
22359 arm_cxx_key_method_may_be_inline (void)
22361 return !TARGET_AAPCS_BASED;
22365 arm_cxx_determine_class_data_visibility (tree decl)
22367 if (!TARGET_AAPCS_BASED
22368 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22371 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22372 is exported. However, on systems without dynamic vague linkage,
22373 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22374 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22375 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22377 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22378 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22382 arm_cxx_class_data_always_comdat (void)
22384 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22385 vague linkage if the class has no key function. */
22386 return !TARGET_AAPCS_BASED;
22390 /* The EABI says __aeabi_atexit should be used to register static
22394 arm_cxx_use_aeabi_atexit (void)
22396 return TARGET_AAPCS_BASED;
22401 arm_set_return_address (rtx source, rtx scratch)
22403 arm_stack_offsets *offsets;
22404 HOST_WIDE_INT delta;
22406 unsigned long saved_regs;
22408 offsets = arm_get_frame_offsets ();
22409 saved_regs = offsets->saved_regs_mask;
22411 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22412 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22415 if (frame_pointer_needed)
22416 addr = plus_constant(hard_frame_pointer_rtx, -4);
22419 /* LR will be the first saved register. */
22420 delta = offsets->outgoing_args - (offsets->frame + 4);
22425 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22426 GEN_INT (delta & ~4095)));
22431 addr = stack_pointer_rtx;
22433 addr = plus_constant (addr, delta);
22435 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22441 thumb_set_return_address (rtx source, rtx scratch)
22443 arm_stack_offsets *offsets;
22444 HOST_WIDE_INT delta;
22445 HOST_WIDE_INT limit;
22448 unsigned long mask;
22452 offsets = arm_get_frame_offsets ();
22453 mask = offsets->saved_regs_mask;
22454 if (mask & (1 << LR_REGNUM))
22457 /* Find the saved regs. */
22458 if (frame_pointer_needed)
22460 delta = offsets->soft_frame - offsets->saved_args;
22461 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22467 delta = offsets->outgoing_args - offsets->saved_args;
22470 /* Allow for the stack frame. */
22471 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22473 /* The link register is always the first saved register. */
22476 /* Construct the address. */
22477 addr = gen_rtx_REG (SImode, reg);
22480 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22481 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22485 addr = plus_constant (addr, delta);
22487 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22490 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22493 /* Implements target hook vector_mode_supported_p. */
22495 arm_vector_mode_supported_p (enum machine_mode mode)
22497 /* Neon also supports V2SImode, etc. listed in the clause below. */
22498 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22499 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22502 if ((TARGET_NEON || TARGET_IWMMXT)
22503 && ((mode == V2SImode)
22504 || (mode == V4HImode)
22505 || (mode == V8QImode)))
22511 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22512 registers when autovectorizing for Neon, at least until multiple vector
22513 widths are supported properly by the middle-end. */
22515 static enum machine_mode
22516 arm_preferred_simd_mode (enum machine_mode mode)
22522 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22524 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22526 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22528 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22530 if (TARGET_NEON_VECTORIZE_QUAD)
22537 if (TARGET_REALLY_IWMMXT)
22553 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22555 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22556 using r0-r4 for function arguments, r7 for the stack frame and don't have
22557 enough left over to do doubleword arithmetic. For Thumb-2 all the
22558 potentially problematic instructions accept high registers so this is not
22559 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22560 that require many low registers. */
22562 arm_class_likely_spilled_p (reg_class_t rclass)
22564 if ((TARGET_THUMB1 && rclass == LO_REGS)
22565 || rclass == CC_REG)
22571 /* Implements target hook small_register_classes_for_mode_p. */
22573 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22575 return TARGET_THUMB1;
22578 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22579 ARM insns and therefore guarantee that the shift count is modulo 256.
22580 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22581 guarantee no particular behavior for out-of-range counts. */
22583 static unsigned HOST_WIDE_INT
22584 arm_shift_truncation_mask (enum machine_mode mode)
22586 return mode == SImode ? 255 : 0;
22590 /* Map internal gcc register numbers to DWARF2 register numbers. */
22593 arm_dbx_register_number (unsigned int regno)
22598 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22599 compatibility. The EABI defines them as registers 96-103. */
22600 if (IS_FPA_REGNUM (regno))
22601 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22603 if (IS_VFP_REGNUM (regno))
22605 /* See comment in arm_dwarf_register_span. */
22606 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22607 return 64 + regno - FIRST_VFP_REGNUM;
22609 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22612 if (IS_IWMMXT_GR_REGNUM (regno))
22613 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22615 if (IS_IWMMXT_REGNUM (regno))
22616 return 112 + regno - FIRST_IWMMXT_REGNUM;
22618 gcc_unreachable ();
22621 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22622 GCC models tham as 64 32-bit registers, so we need to describe this to
22623 the DWARF generation code. Other registers can use the default. */
22625 arm_dwarf_register_span (rtx rtl)
22632 regno = REGNO (rtl);
22633 if (!IS_VFP_REGNUM (regno))
22636 /* XXX FIXME: The EABI defines two VFP register ranges:
22637 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22639 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22640 corresponding D register. Until GDB supports this, we shall use the
22641 legacy encodings. We also use these encodings for D0-D15 for
22642 compatibility with older debuggers. */
22643 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22646 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22647 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22648 regno = (regno - FIRST_VFP_REGNUM) / 2;
22649 for (i = 0; i < nregs; i++)
22650 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22655 #if ARM_UNWIND_INFO
22656 /* Emit unwind directives for a store-multiple instruction or stack pointer
22657 push during alignment.
22658 These should only ever be generated by the function prologue code, so
22659 expect them to have a particular form. */
22662 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22665 HOST_WIDE_INT offset;
22666 HOST_WIDE_INT nregs;
22672 e = XVECEXP (p, 0, 0);
22673 if (GET_CODE (e) != SET)
22676 /* First insn will adjust the stack pointer. */
22677 if (GET_CODE (e) != SET
22678 || GET_CODE (XEXP (e, 0)) != REG
22679 || REGNO (XEXP (e, 0)) != SP_REGNUM
22680 || GET_CODE (XEXP (e, 1)) != PLUS)
22683 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22684 nregs = XVECLEN (p, 0) - 1;
22686 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22689 /* The function prologue may also push pc, but not annotate it as it is
22690 never restored. We turn this into a stack pointer adjustment. */
22691 if (nregs * 4 == offset - 4)
22693 fprintf (asm_out_file, "\t.pad #4\n");
22697 fprintf (asm_out_file, "\t.save {");
22699 else if (IS_VFP_REGNUM (reg))
22702 fprintf (asm_out_file, "\t.vsave {");
22704 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22706 /* FPA registers are done differently. */
22707 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22711 /* Unknown register type. */
22714 /* If the stack increment doesn't match the size of the saved registers,
22715 something has gone horribly wrong. */
22716 if (offset != nregs * reg_size)
22721 /* The remaining insns will describe the stores. */
22722 for (i = 1; i <= nregs; i++)
22724 /* Expect (set (mem <addr>) (reg)).
22725 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22726 e = XVECEXP (p, 0, i);
22727 if (GET_CODE (e) != SET
22728 || GET_CODE (XEXP (e, 0)) != MEM
22729 || GET_CODE (XEXP (e, 1)) != REG)
22732 reg = REGNO (XEXP (e, 1));
22737 fprintf (asm_out_file, ", ");
22738 /* We can't use %r for vfp because we need to use the
22739 double precision register names. */
22740 if (IS_VFP_REGNUM (reg))
22741 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22743 asm_fprintf (asm_out_file, "%r", reg);
22745 #ifdef ENABLE_CHECKING
22746 /* Check that the addresses are consecutive. */
22747 e = XEXP (XEXP (e, 0), 0);
22748 if (GET_CODE (e) == PLUS)
22750 offset += reg_size;
22751 if (GET_CODE (XEXP (e, 0)) != REG
22752 || REGNO (XEXP (e, 0)) != SP_REGNUM
22753 || GET_CODE (XEXP (e, 1)) != CONST_INT
22754 || offset != INTVAL (XEXP (e, 1)))
22758 || GET_CODE (e) != REG
22759 || REGNO (e) != SP_REGNUM)
22763 fprintf (asm_out_file, "}\n");
22766 /* Emit unwind directives for a SET. */
22769 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22777 switch (GET_CODE (e0))
22780 /* Pushing a single register. */
22781 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22782 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22783 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22786 asm_fprintf (asm_out_file, "\t.save ");
22787 if (IS_VFP_REGNUM (REGNO (e1)))
22788 asm_fprintf(asm_out_file, "{d%d}\n",
22789 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22791 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22795 if (REGNO (e0) == SP_REGNUM)
22797 /* A stack increment. */
22798 if (GET_CODE (e1) != PLUS
22799 || GET_CODE (XEXP (e1, 0)) != REG
22800 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22801 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22804 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22805 -INTVAL (XEXP (e1, 1)));
22807 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22809 HOST_WIDE_INT offset;
22811 if (GET_CODE (e1) == PLUS)
22813 if (GET_CODE (XEXP (e1, 0)) != REG
22814 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22816 reg = REGNO (XEXP (e1, 0));
22817 offset = INTVAL (XEXP (e1, 1));
22818 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22819 HARD_FRAME_POINTER_REGNUM, reg,
22822 else if (GET_CODE (e1) == REG)
22825 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22826 HARD_FRAME_POINTER_REGNUM, reg);
22831 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22833 /* Move from sp to reg. */
22834 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22836 else if (GET_CODE (e1) == PLUS
22837 && GET_CODE (XEXP (e1, 0)) == REG
22838 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22839 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22841 /* Set reg to offset from sp. */
22842 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22843 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22845 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22847 /* Stack pointer save before alignment. */
22849 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22862 /* Emit unwind directives for the given insn. */
22865 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22869 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22872 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22873 && (TREE_NOTHROW (current_function_decl)
22874 || crtl->all_throwers_are_sibcalls))
22877 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22880 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22882 pat = XEXP (pat, 0);
22884 pat = PATTERN (insn);
22886 switch (GET_CODE (pat))
22889 arm_unwind_emit_set (asm_out_file, pat);
22893 /* Store multiple. */
22894 arm_unwind_emit_sequence (asm_out_file, pat);
22903 /* Output a reference from a function exception table to the type_info
22904 object X. The EABI specifies that the symbol should be relocated by
22905 an R_ARM_TARGET2 relocation. */
22908 arm_output_ttype (rtx x)
22910 fputs ("\t.word\t", asm_out_file);
22911 output_addr_const (asm_out_file, x);
22912 /* Use special relocations for symbol references. */
22913 if (GET_CODE (x) != CONST_INT)
22914 fputs ("(TARGET2)", asm_out_file);
22915 fputc ('\n', asm_out_file);
22920 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22923 arm_asm_emit_except_personality (rtx personality)
22925 fputs ("\t.personality\t", asm_out_file);
22926 output_addr_const (asm_out_file, personality);
22927 fputc ('\n', asm_out_file);
22930 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22933 arm_asm_init_sections (void)
22935 exception_section = get_unnamed_section (0, output_section_asm_op,
22938 #endif /* ARM_UNWIND_INFO */
22940 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22942 static enum unwind_info_type
22943 arm_except_unwind_info (struct gcc_options *opts)
22945 /* Honor the --enable-sjlj-exceptions configure switch. */
22946 #ifdef CONFIG_SJLJ_EXCEPTIONS
22947 if (CONFIG_SJLJ_EXCEPTIONS)
22951 /* If not using ARM EABI unwind tables... */
22952 if (ARM_UNWIND_INFO)
22954 /* For simplicity elsewhere in this file, indicate that all unwind
22955 info is disabled if we're not emitting unwind tables. */
22956 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22962 /* ... we use sjlj exceptions for backwards compatibility. */
22967 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22968 stack alignment. */
22971 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22973 rtx unspec = SET_SRC (pattern);
22974 gcc_assert (GET_CODE (unspec) == UNSPEC);
22978 case UNSPEC_STACK_ALIGN:
22979 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22980 put anything on the stack, so hopefully it won't matter.
22981 CFA = SP will be correct after alignment. */
22982 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22983 SET_DEST (pattern));
22986 gcc_unreachable ();
22991 /* Output unwind directives for the start/end of a function. */
22994 arm_output_fn_unwind (FILE * f, bool prologue)
22996 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23000 fputs ("\t.fnstart\n", f);
23003 /* If this function will never be unwound, then mark it as such.
23004 The came condition is used in arm_unwind_emit to suppress
23005 the frame annotations. */
23006 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23007 && (TREE_NOTHROW (current_function_decl)
23008 || crtl->all_throwers_are_sibcalls))
23009 fputs("\t.cantunwind\n", f);
23011 fputs ("\t.fnend\n", f);
23016 arm_emit_tls_decoration (FILE *fp, rtx x)
23018 enum tls_reloc reloc;
23021 val = XVECEXP (x, 0, 0);
23022 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23024 output_addr_const (fp, val);
23029 fputs ("(tlsgd)", fp);
23032 fputs ("(tlsldm)", fp);
23035 fputs ("(tlsldo)", fp);
23038 fputs ("(gottpoff)", fp);
23041 fputs ("(tpoff)", fp);
23044 gcc_unreachable ();
23052 fputs (" + (. - ", fp);
23053 output_addr_const (fp, XVECEXP (x, 0, 2));
23055 output_addr_const (fp, XVECEXP (x, 0, 3));
23065 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23068 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
23070 gcc_assert (size == 4);
23071 fputs ("\t.word\t", file);
23072 output_addr_const (file, x);
23073 fputs ("(tlsldo)", file);
23076 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23079 arm_output_addr_const_extra (FILE *fp, rtx x)
23081 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
23082 return arm_emit_tls_decoration (fp, x);
23083 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
23086 int labelno = INTVAL (XVECEXP (x, 0, 0));
23088 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
23089 assemble_name_raw (fp, label);
23093 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
23095 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
23099 output_addr_const (fp, XVECEXP (x, 0, 0));
23103 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
23105 output_addr_const (fp, XVECEXP (x, 0, 0));
23109 output_addr_const (fp, XVECEXP (x, 0, 1));
23113 else if (GET_CODE (x) == CONST_VECTOR)
23114 return arm_emit_vector_const (fp, x);
23119 /* Output assembly for a shift instruction.
23120 SET_FLAGS determines how the instruction modifies the condition codes.
23121 0 - Do not set condition codes.
23122 1 - Set condition codes.
23123 2 - Use smallest instruction. */
23125 arm_output_shift(rtx * operands, int set_flags)
23128 static const char flag_chars[3] = {'?', '.', '!'};
23133 c = flag_chars[set_flags];
23134 if (TARGET_UNIFIED_ASM)
23136 shift = shift_op(operands[3], &val);
23140 operands[2] = GEN_INT(val);
23141 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23144 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23147 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23148 output_asm_insn (pattern, operands);
23152 /* Output a Thumb-1 casesi dispatch sequence. */
23154 thumb1_output_casesi (rtx *operands)
23156 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23158 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23160 switch (GET_MODE(diff_vec))
23163 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23164 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23166 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23167 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23169 return "bl\t%___gnu_thumb1_case_si";
23171 gcc_unreachable ();
23175 /* Output a Thumb-2 casesi instruction. */
23177 thumb2_output_casesi (rtx *operands)
23179 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23181 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23183 output_asm_insn ("cmp\t%0, %1", operands);
23184 output_asm_insn ("bhi\t%l3", operands);
23185 switch (GET_MODE(diff_vec))
23188 return "tbb\t[%|pc, %0]";
23190 return "tbh\t[%|pc, %0, lsl #1]";
23194 output_asm_insn ("adr\t%4, %l2", operands);
23195 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23196 output_asm_insn ("add\t%4, %4, %5", operands);
23201 output_asm_insn ("adr\t%4, %l2", operands);
23202 return "ldr\t%|pc, [%4, %0, lsl #2]";
23205 gcc_unreachable ();
23209 /* Most ARM cores are single issue, but some newer ones can dual issue.
23210 The scheduler descriptions rely on this being correct. */
23212 arm_issue_rate (void)
23229 /* A table and a function to perform ARM-specific name mangling for
23230 NEON vector types in order to conform to the AAPCS (see "Procedure
23231 Call Standard for the ARM Architecture", Appendix A). To qualify
23232 for emission with the mangled names defined in that document, a
23233 vector type must not only be of the correct mode but also be
23234 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23237 enum machine_mode mode;
23238 const char *element_type_name;
23239 const char *aapcs_name;
23240 } arm_mangle_map_entry;
23242 static arm_mangle_map_entry arm_mangle_map[] = {
23243 /* 64-bit containerized types. */
23244 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23245 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23246 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23247 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23248 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23249 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23250 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23251 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23252 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23253 /* 128-bit containerized types. */
23254 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23255 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23256 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23257 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23258 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23259 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23260 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23261 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23262 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23263 { VOIDmode, NULL, NULL }
23267 arm_mangle_type (const_tree type)
23269 arm_mangle_map_entry *pos = arm_mangle_map;
23271 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23272 has to be managled as if it is in the "std" namespace. */
23273 if (TARGET_AAPCS_BASED
23274 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23276 static bool warned;
23277 if (!warned && warn_psabi && !in_system_header)
23280 inform (input_location,
23281 "the mangling of %<va_list%> has changed in GCC 4.4");
23283 return "St9__va_list";
23286 /* Half-precision float. */
23287 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23290 if (TREE_CODE (type) != VECTOR_TYPE)
23293 /* Check the mode of the vector type, and the name of the vector
23294 element type, against the table. */
23295 while (pos->mode != VOIDmode)
23297 tree elt_type = TREE_TYPE (type);
23299 if (pos->mode == TYPE_MODE (type)
23300 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23301 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23302 pos->element_type_name))
23303 return pos->aapcs_name;
23308 /* Use the default mangling for unrecognized (possibly user-defined)
23313 /* Order of allocation of core registers for Thumb: this allocation is
23314 written over the corresponding initial entries of the array
23315 initialized with REG_ALLOC_ORDER. We allocate all low registers
23316 first. Saving and restoring a low register is usually cheaper than
23317 using a call-clobbered high register. */
23319 static const int thumb_core_reg_alloc_order[] =
23321 3, 2, 1, 0, 4, 5, 6, 7,
23322 14, 12, 8, 9, 10, 11, 13, 15
23325 /* Adjust register allocation order when compiling for Thumb. */
23328 arm_order_regs_for_local_alloc (void)
23330 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23331 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23333 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23334 sizeof (thumb_core_reg_alloc_order));
23337 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23340 arm_frame_pointer_required (void)
23342 return (cfun->has_nonlocal_label
23343 || SUBTARGET_FRAME_POINTER_REQUIRED
23344 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23347 /* Only thumb1 can't support conditional execution, so return true if
23348 the target is not thumb1. */
23350 arm_have_conditional_execution (void)
23352 return !TARGET_THUMB1;
23355 /* Legitimize a memory reference for sync primitive implemented using
23356 ldrex / strex. We currently force the form of the reference to be
23357 indirect without offset. We do not yet support the indirect offset
23358 addressing supported by some ARM targets for these
23361 arm_legitimize_sync_memory (rtx memory)
23363 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23364 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23366 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23367 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23368 return legitimate_memory;
23371 /* An instruction emitter. */
23372 typedef void (* emit_f) (int label, const char *, rtx *);
23374 /* An instruction emitter that emits via the conventional
23375 output_asm_insn. */
23377 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23379 output_asm_insn (pattern, operands);
23382 /* Count the number of emitted synchronization instructions. */
23383 static unsigned arm_insn_count;
23385 /* An emitter that counts emitted instructions but does not actually
23386 emit instruction into the instruction stream. */
23388 arm_count (int label,
23389 const char *pattern ATTRIBUTE_UNUSED,
23390 rtx *operands ATTRIBUTE_UNUSED)
23396 /* Construct a pattern using conventional output formatting and feed
23397 it to output_asm_insn. Provides a mechanism to construct the
23398 output pattern on the fly. Note the hard limit on the pattern
23400 static void ATTRIBUTE_PRINTF_4
23401 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23402 const char *pattern, ...)
23407 va_start (ap, pattern);
23408 vsprintf (buffer, pattern, ap);
23410 emit (label, buffer, operands);
23413 /* Emit the memory barrier instruction, if any, provided by this
23414 target to a specified emitter. */
23416 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23418 if (TARGET_HAVE_DMB)
23420 /* Note we issue a system level barrier. We should consider
23421 issuing a inner shareabilty zone barrier here instead, ie.
23423 emit (0, "dmb\tsy", operands);
23427 if (TARGET_HAVE_DMB_MCR)
23429 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23433 gcc_unreachable ();
23436 /* Emit the memory barrier instruction, if any, provided by this
23439 arm_output_memory_barrier (rtx *operands)
23441 arm_process_output_memory_barrier (arm_emit, operands);
23445 /* Helper to figure out the instruction suffix required on ldrex/strex
23446 for operations on an object of the specified mode. */
23447 static const char *
23448 arm_ldrex_suffix (enum machine_mode mode)
23452 case QImode: return "b";
23453 case HImode: return "h";
23454 case SImode: return "";
23455 case DImode: return "d";
23457 gcc_unreachable ();
23462 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23465 arm_output_ldrex (emit_f emit,
23466 enum machine_mode mode,
23470 const char *suffix = arm_ldrex_suffix (mode);
23473 operands[0] = target;
23474 operands[1] = memory;
23475 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23478 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23481 arm_output_strex (emit_f emit,
23482 enum machine_mode mode,
23488 const char *suffix = arm_ldrex_suffix (mode);
23491 operands[0] = result;
23492 operands[1] = value;
23493 operands[2] = memory;
23494 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23498 /* Helper to emit a two operand instruction. */
23500 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23506 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23509 /* Helper to emit a three operand instruction. */
23511 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23518 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23521 /* Emit a load store exclusive synchronization loop.
23525 if old_value != required_value
23527 t1 = sync_op (old_value, new_value)
23528 [mem] = t1, t2 = [0|1]
23532 t1 == t2 is not permitted
23533 t1 == old_value is permitted
23537 RTX register or const_int representing the required old_value for
23538 the modify to continue, if NULL no comparsion is performed. */
23540 arm_output_sync_loop (emit_f emit,
23541 enum machine_mode mode,
23544 rtx required_value,
23548 enum attr_sync_op sync_op,
23549 int early_barrier_required)
23553 gcc_assert (t1 != t2);
23555 if (early_barrier_required)
23556 arm_process_output_memory_barrier (emit, NULL);
23558 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23560 arm_output_ldrex (emit, mode, old_value, memory);
23562 if (required_value)
23566 operands[0] = old_value;
23567 operands[1] = required_value;
23568 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23569 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23575 arm_output_op3 (emit, "add", t1, old_value, new_value);
23579 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23583 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23587 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23591 arm_output_op3 (emit,"and", t1, old_value, new_value);
23595 arm_output_op3 (emit, "and", t1, old_value, new_value);
23596 arm_output_op2 (emit, "mvn", t1, t1);
23606 arm_output_strex (emit, mode, "", t2, t1, memory);
23608 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23609 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23610 LOCAL_LABEL_PREFIX);
23614 /* Use old_value for the return value because for some operations
23615 the old_value can easily be restored. This saves one register. */
23616 arm_output_strex (emit, mode, "", old_value, t1, memory);
23617 operands[0] = old_value;
23618 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23619 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23620 LOCAL_LABEL_PREFIX);
23625 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23629 arm_output_op3 (emit, "add", old_value, t1, new_value);
23633 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23637 arm_output_op2 (emit, "mov", old_value, required_value);
23641 gcc_unreachable ();
23645 arm_process_output_memory_barrier (emit, NULL);
23646 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23650 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23653 default_value = operands[index - 1];
23655 return default_value;
23658 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23659 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23661 /* Extract the operands for a synchroniztion instruction from the
23662 instructions attributes and emit the instruction. */
23664 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23666 rtx result, memory, required_value, new_value, t1, t2;
23668 enum machine_mode mode;
23669 enum attr_sync_op sync_op;
23671 result = FETCH_SYNC_OPERAND(result, 0);
23672 memory = FETCH_SYNC_OPERAND(memory, 0);
23673 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23674 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23675 t1 = FETCH_SYNC_OPERAND(t1, 0);
23676 t2 = FETCH_SYNC_OPERAND(t2, 0);
23678 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23679 sync_op = get_attr_sync_op (insn);
23680 mode = GET_MODE (memory);
23682 arm_output_sync_loop (emit, mode, result, memory, required_value,
23683 new_value, t1, t2, sync_op, early_barrier);
23686 /* Emit a synchronization instruction loop. */
23688 arm_output_sync_insn (rtx insn, rtx *operands)
23690 arm_process_output_sync_insn (arm_emit, insn, operands);
23694 /* Count the number of machine instruction that will be emitted for a
23695 synchronization instruction. Note that the emitter used does not
23696 emit instructions, it just counts instructions being carefull not
23697 to count labels. */
23699 arm_sync_loop_insns (rtx insn, rtx *operands)
23701 arm_insn_count = 0;
23702 arm_process_output_sync_insn (arm_count, insn, operands);
23703 return arm_insn_count;
23706 /* Helper to call a target sync instruction generator, dealing with
23707 the variation in operands required by the different generators. */
23709 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23710 rtx memory, rtx required_value, rtx new_value)
23712 switch (generator->op)
23714 case arm_sync_generator_omn:
23715 gcc_assert (! required_value);
23716 return generator->u.omn (old_value, memory, new_value);
23718 case arm_sync_generator_omrn:
23719 gcc_assert (required_value);
23720 return generator->u.omrn (old_value, memory, required_value, new_value);
23726 /* Expand a synchronization loop. The synchronization loop is expanded
23727 as an opaque block of instructions in order to ensure that we do
23728 not subsequently get extraneous memory accesses inserted within the
23729 critical region. The exclusive access property of ldrex/strex is
23730 only guaranteed in there are no intervening memory accesses. */
23732 arm_expand_sync (enum machine_mode mode,
23733 struct arm_sync_generator *generator,
23734 rtx target, rtx memory, rtx required_value, rtx new_value)
23736 if (target == NULL)
23737 target = gen_reg_rtx (mode);
23739 memory = arm_legitimize_sync_memory (memory);
23740 if (mode != SImode)
23742 rtx load_temp = gen_reg_rtx (SImode);
23744 if (required_value)
23745 required_value = convert_modes (SImode, mode, required_value, true);
23747 new_value = convert_modes (SImode, mode, new_value, true);
23748 emit_insn (arm_call_generator (generator, load_temp, memory,
23749 required_value, new_value));
23750 emit_move_insn (target, gen_lowpart (mode, load_temp));
23754 emit_insn (arm_call_generator (generator, target, memory, required_value,
23759 static unsigned int
23760 arm_autovectorize_vector_sizes (void)
23762 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23766 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23768 /* Vectors which aren't in packed structures will not be less aligned than
23769 the natural alignment of their element type, so this is safe. */
23770 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23773 return default_builtin_vector_alignment_reachable (type, is_packed);
23777 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23778 const_tree type, int misalignment,
23781 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23783 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23788 /* If the misalignment is unknown, we should be able to handle the access
23789 so long as it is not to a member of a packed data structure. */
23790 if (misalignment == -1)
23793 /* Return true if the misalignment is a multiple of the natural alignment
23794 of the vector's element type. This is probably always going to be
23795 true in practice, since we've already established that this isn't a
23797 return ((misalignment % align) == 0);
23800 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23805 arm_conditional_register_usage (void)
23809 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23811 for (regno = FIRST_FPA_REGNUM;
23812 regno <= LAST_FPA_REGNUM; ++regno)
23813 fixed_regs[regno] = call_used_regs[regno] = 1;
23816 if (TARGET_THUMB1 && optimize_size)
23818 /* When optimizing for size on Thumb-1, it's better not
23819 to use the HI regs, because of the overhead of
23821 for (regno = FIRST_HI_REGNUM;
23822 regno <= LAST_HI_REGNUM; ++regno)
23823 fixed_regs[regno] = call_used_regs[regno] = 1;
23826 /* The link register can be clobbered by any branch insn,
23827 but we have no way to track that at present, so mark
23828 it as unavailable. */
23830 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23832 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23834 if (TARGET_MAVERICK)
23836 for (regno = FIRST_FPA_REGNUM;
23837 regno <= LAST_FPA_REGNUM; ++ regno)
23838 fixed_regs[regno] = call_used_regs[regno] = 1;
23839 for (regno = FIRST_CIRRUS_FP_REGNUM;
23840 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23842 fixed_regs[regno] = 0;
23843 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23848 /* VFPv3 registers are disabled when earlier VFP
23849 versions are selected due to the definition of
23850 LAST_VFP_REGNUM. */
23851 for (regno = FIRST_VFP_REGNUM;
23852 regno <= LAST_VFP_REGNUM; ++ regno)
23854 fixed_regs[regno] = 0;
23855 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23856 || regno >= FIRST_VFP_REGNUM + 32;
23861 if (TARGET_REALLY_IWMMXT)
23863 regno = FIRST_IWMMXT_GR_REGNUM;
23864 /* The 2002/10/09 revision of the XScale ABI has wCG0
23865 and wCG1 as call-preserved registers. The 2002/11/21
23866 revision changed this so that all wCG registers are
23867 scratch registers. */
23868 for (regno = FIRST_IWMMXT_GR_REGNUM;
23869 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23870 fixed_regs[regno] = 0;
23871 /* The XScale ABI has wR0 - wR9 as scratch registers,
23872 the rest as call-preserved registers. */
23873 for (regno = FIRST_IWMMXT_REGNUM;
23874 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23876 fixed_regs[regno] = 0;
23877 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23881 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23883 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23884 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23886 else if (TARGET_APCS_STACK)
23888 fixed_regs[10] = 1;
23889 call_used_regs[10] = 1;
23891 /* -mcaller-super-interworking reserves r11 for calls to
23892 _interwork_r11_call_via_rN(). Making the register global
23893 is an easy way of ensuring that it remains valid for all
23895 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23896 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23898 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23899 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23900 if (TARGET_CALLER_INTERWORKING)
23901 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23903 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23907 arm_preferred_rename_class (reg_class_t rclass)
23909 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23910 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23911 and code size can be reduced. */
23912 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23918 /* Compute the atrribute "length" of insn "*push_multi".
23919 So this function MUST be kept in sync with that insn pattern. */
23921 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23923 int i, regno, hi_reg;
23924 int num_saves = XVECLEN (parallel_op, 0);
23931 regno = REGNO (first_op);
23932 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23933 for (i = 1; i < num_saves && !hi_reg; i++)
23935 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23936 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23944 #include "gt-arm.h"