1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static bool arm_memory_load_p (rtx);
170 static bool arm_cirrus_insn_p (rtx);
171 static void cirrus_reorg (rtx);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_encode_section_info (tree, rtx, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
262 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
263 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
267 static void arm_conditional_register_usage (void);
268 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
269 static unsigned int arm_autovectorize_vector_sizes (void);
270 static int arm_default_branch_cost (bool, bool);
271 static int arm_cortex_a5_branch_cost (bool, bool);
273 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
274 const unsigned char *sel);
277 /* Table of machine attributes. */
278 static const struct attribute_spec arm_attribute_table[] =
280 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
281 affects_type_identity } */
282 /* Function calls made to this symbol must be done indirectly, because
283 it may lie outside of the 26 bit addressing range of a normal function
285 { "long_call", 0, 0, false, true, true, NULL, false },
286 /* Whereas these functions are always known to reside within the 26 bit
288 { "short_call", 0, 0, false, true, true, NULL, false },
289 /* Specify the procedure call conventions for a function. */
290 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
292 /* Interrupt Service Routines have special prologue and epilogue requirements. */
293 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
295 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
297 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
300 /* ARM/PE has three new attributes:
302 dllexport - for exporting a function/variable that will live in a dll
303 dllimport - for importing a function/variable from a dll
305 Microsoft allows multiple declspecs in one __declspec, separating
306 them with spaces. We do NOT support this. Instead, use __declspec
309 { "dllimport", 0, 0, true, false, false, NULL, false },
310 { "dllexport", 0, 0, true, false, false, NULL, false },
311 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
313 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
314 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
315 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
316 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
319 { NULL, 0, 0, false, false, false, NULL, false }
322 /* Initialize the GCC target structure. */
323 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
324 #undef TARGET_MERGE_DECL_ATTRIBUTES
325 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
328 #undef TARGET_LEGITIMIZE_ADDRESS
329 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
331 #undef TARGET_ATTRIBUTE_TABLE
332 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
334 #undef TARGET_ASM_FILE_START
335 #define TARGET_ASM_FILE_START arm_file_start
336 #undef TARGET_ASM_FILE_END
337 #define TARGET_ASM_FILE_END arm_file_end
339 #undef TARGET_ASM_ALIGNED_SI_OP
340 #define TARGET_ASM_ALIGNED_SI_OP NULL
341 #undef TARGET_ASM_INTEGER
342 #define TARGET_ASM_INTEGER arm_assemble_integer
344 #undef TARGET_PRINT_OPERAND
345 #define TARGET_PRINT_OPERAND arm_print_operand
346 #undef TARGET_PRINT_OPERAND_ADDRESS
347 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
348 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
349 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
351 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
352 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
354 #undef TARGET_ASM_FUNCTION_PROLOGUE
355 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
357 #undef TARGET_ASM_FUNCTION_EPILOGUE
358 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
360 #undef TARGET_OPTION_OVERRIDE
361 #define TARGET_OPTION_OVERRIDE arm_option_override
363 #undef TARGET_COMP_TYPE_ATTRIBUTES
364 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
366 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
367 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
369 #undef TARGET_SCHED_ADJUST_COST
370 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
372 #undef TARGET_REGISTER_MOVE_COST
373 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
375 #undef TARGET_MEMORY_MOVE_COST
376 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
378 #undef TARGET_ENCODE_SECTION_INFO
380 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
382 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
385 #undef TARGET_STRIP_NAME_ENCODING
386 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
388 #undef TARGET_ASM_INTERNAL_LABEL
389 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
394 #undef TARGET_FUNCTION_VALUE
395 #define TARGET_FUNCTION_VALUE arm_function_value
397 #undef TARGET_LIBCALL_VALUE
398 #define TARGET_LIBCALL_VALUE arm_libcall_value
400 #undef TARGET_FUNCTION_VALUE_REGNO_P
401 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
403 #undef TARGET_ASM_OUTPUT_MI_THUNK
404 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
405 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
406 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS arm_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST arm_address_cost
413 #undef TARGET_SHIFT_TRUNCATION_MASK
414 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
415 #undef TARGET_VECTOR_MODE_SUPPORTED_P
416 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
417 #undef TARGET_ARRAY_MODE_SUPPORTED_P
418 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
421 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
422 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
423 arm_autovectorize_vector_sizes
425 #undef TARGET_MACHINE_DEPENDENT_REORG
426 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
428 #undef TARGET_INIT_BUILTINS
429 #define TARGET_INIT_BUILTINS arm_init_builtins
430 #undef TARGET_EXPAND_BUILTIN
431 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
432 #undef TARGET_BUILTIN_DECL
433 #define TARGET_BUILTIN_DECL arm_builtin_decl
435 #undef TARGET_INIT_LIBFUNCS
436 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
438 #undef TARGET_PROMOTE_FUNCTION_MODE
439 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
440 #undef TARGET_PROMOTE_PROTOTYPES
441 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
442 #undef TARGET_PASS_BY_REFERENCE
443 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
444 #undef TARGET_ARG_PARTIAL_BYTES
445 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
446 #undef TARGET_FUNCTION_ARG
447 #define TARGET_FUNCTION_ARG arm_function_arg
448 #undef TARGET_FUNCTION_ARG_ADVANCE
449 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
450 #undef TARGET_FUNCTION_ARG_BOUNDARY
451 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
453 #undef TARGET_SETUP_INCOMING_VARARGS
454 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
456 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
457 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
459 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
460 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
461 #undef TARGET_TRAMPOLINE_INIT
462 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
463 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
464 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
580 #undef TARGET_INVALID_PARAMETER_TYPE
581 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
583 #undef TARGET_INVALID_RETURN_TYPE
584 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
586 #undef TARGET_PROMOTED_TYPE
587 #define TARGET_PROMOTED_TYPE arm_promoted_type
589 #undef TARGET_CONVERT_TO_TYPE
590 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
592 #undef TARGET_SCALAR_MODE_SUPPORTED_P
593 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
595 #undef TARGET_FRAME_POINTER_REQUIRED
596 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
598 #undef TARGET_CAN_ELIMINATE
599 #define TARGET_CAN_ELIMINATE arm_can_eliminate
601 #undef TARGET_CONDITIONAL_REGISTER_USAGE
602 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
604 #undef TARGET_CLASS_LIKELY_SPILLED_P
605 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
607 #undef TARGET_VECTOR_ALIGNMENT
608 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
610 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
611 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
612 arm_vector_alignment_reachable
614 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
615 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
616 arm_builtin_support_vector_misalignment
618 #undef TARGET_PREFERRED_RENAME_CLASS
619 #define TARGET_PREFERRED_RENAME_CLASS \
620 arm_preferred_rename_class
622 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
623 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
624 arm_vectorize_vec_perm_const_ok
626 struct gcc_target targetm = TARGET_INITIALIZER;
628 /* Obstack for minipool constant handling. */
629 static struct obstack minipool_obstack;
630 static char * minipool_startobj;
632 /* The maximum number of insns skipped which
633 will be conditionalised if possible. */
634 static int max_insns_skipped = 5;
636 extern FILE * asm_out_file;
638 /* True if we are currently building a constant table. */
639 int making_const_table;
641 /* The processor for which instructions should be scheduled. */
642 enum processor_type arm_tune = arm_none;
644 /* The current tuning set. */
645 const struct tune_params *current_tune;
647 /* Which floating point hardware to schedule for. */
650 /* Which floating popint hardware to use. */
651 const struct arm_fpu_desc *arm_fpu_desc;
653 /* Used for Thumb call_via trampolines. */
654 rtx thumb_call_via_label[14];
655 static int thumb_call_reg_needed;
657 /* Bit values used to identify processor capabilities. */
658 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
659 #define FL_ARCH3M (1 << 1) /* Extended multiply */
660 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
661 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
662 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
663 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
664 #define FL_THUMB (1 << 6) /* Thumb aware */
665 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
666 #define FL_STRONG (1 << 8) /* StrongARM */
667 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
668 #define FL_XSCALE (1 << 10) /* XScale */
669 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
670 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
671 media instructions. */
672 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
673 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
674 Note: ARM6 & 7 derivatives only. */
675 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
676 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
677 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
679 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
680 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
681 #define FL_NEON (1 << 20) /* Neon instructions. */
682 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
684 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
685 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
687 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
689 /* Flags that only effect tuning, not available instructions. */
690 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
693 #define FL_FOR_ARCH2 FL_NOTM
694 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
695 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
696 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
697 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
698 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
699 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
700 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
701 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
702 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
703 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
704 #define FL_FOR_ARCH6J FL_FOR_ARCH6
705 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
706 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
707 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
708 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
709 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
710 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
711 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
712 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
713 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
714 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
716 /* The bits in this mask specify which
717 instructions we are allowed to generate. */
718 static unsigned long insn_flags = 0;
720 /* The bits in this mask specify which instruction scheduling options should
722 static unsigned long tune_flags = 0;
724 /* The following are used in the arm.md file as equivalents to bits
725 in the above two flag variables. */
727 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
730 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
733 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
736 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
739 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
742 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
745 /* Nonzero if this chip supports the ARM 6K extensions. */
748 /* Nonzero if this chip supports the ARM 7 extensions. */
751 /* Nonzero if instructions not present in the 'M' profile can be used. */
752 int arm_arch_notm = 0;
754 /* Nonzero if instructions present in ARMv7E-M can be used. */
757 /* Nonzero if this chip can benefit from load scheduling. */
758 int arm_ld_sched = 0;
760 /* Nonzero if this chip is a StrongARM. */
761 int arm_tune_strongarm = 0;
763 /* Nonzero if this chip is a Cirrus variant. */
764 int arm_arch_cirrus = 0;
766 /* Nonzero if this chip supports Intel Wireless MMX technology. */
767 int arm_arch_iwmmxt = 0;
769 /* Nonzero if this chip is an XScale. */
770 int arm_arch_xscale = 0;
772 /* Nonzero if tuning for XScale */
773 int arm_tune_xscale = 0;
775 /* Nonzero if we want to tune for stores that access the write-buffer.
776 This typically means an ARM6 or ARM7 with MMU or MPU. */
777 int arm_tune_wbuf = 0;
779 /* Nonzero if tuning for Cortex-A9. */
780 int arm_tune_cortex_a9 = 0;
782 /* Nonzero if generating Thumb instructions. */
785 /* Nonzero if generating Thumb-1 instructions. */
788 /* Nonzero if we should define __THUMB_INTERWORK__ in the
790 XXX This is a bit of a hack, it's intended to help work around
791 problems in GLD which doesn't understand that armv5t code is
792 interworking clean. */
793 int arm_cpp_interwork = 0;
795 /* Nonzero if chip supports Thumb 2. */
798 /* Nonzero if chip supports integer division instruction. */
799 int arm_arch_arm_hwdiv;
800 int arm_arch_thumb_hwdiv;
802 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
803 we must report the mode of the memory reference from
804 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
805 enum machine_mode output_memory_reference_mode;
807 /* The register number to be used for the PIC offset register. */
808 unsigned arm_pic_register = INVALID_REGNUM;
810 /* Set to 1 after arm_reorg has started. Reset to start at the start of
811 the next function. */
812 static int after_arm_reorg = 0;
814 enum arm_pcs arm_pcs_default;
816 /* For an explanation of these variables, see final_prescan_insn below. */
818 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
819 enum arm_cond_code arm_current_cc;
822 int arm_target_label;
823 /* The number of conditionally executed insns, including the current insn. */
824 int arm_condexec_count = 0;
825 /* A bitmask specifying the patterns for the IT block.
826 Zero means do not output an IT block before this insn. */
827 int arm_condexec_mask = 0;
828 /* The number of bits used in arm_condexec_mask. */
829 int arm_condexec_masklen = 0;
831 /* The condition codes of the ARM, and the inverse function. */
832 static const char * const arm_condition_codes[] =
834 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
835 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
838 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
839 int arm_regs_in_sequence[] =
841 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
844 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
845 #define streq(string1, string2) (strcmp (string1, string2) == 0)
847 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
848 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
849 | (1 << PIC_OFFSET_TABLE_REGNUM)))
851 /* Initialization code. */
855 const char *const name;
856 enum processor_type core;
858 const unsigned long flags;
859 const struct tune_params *const tune;
863 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
864 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
869 const struct tune_params arm_slowmul_tune =
871 arm_slowmul_rtx_costs,
873 3, /* Constant limit. */
874 5, /* Max cond insns. */
875 ARM_PREFETCH_NOT_BENEFICIAL,
876 true, /* Prefer constant pool. */
877 arm_default_branch_cost
880 const struct tune_params arm_fastmul_tune =
882 arm_fastmul_rtx_costs,
884 1, /* Constant limit. */
885 5, /* Max cond insns. */
886 ARM_PREFETCH_NOT_BENEFICIAL,
887 true, /* Prefer constant pool. */
888 arm_default_branch_cost
891 /* StrongARM has early execution of branches, so a sequence that is worth
892 skipping is shorter. Set max_insns_skipped to a lower value. */
894 const struct tune_params arm_strongarm_tune =
896 arm_fastmul_rtx_costs,
898 1, /* Constant limit. */
899 3, /* Max cond insns. */
900 ARM_PREFETCH_NOT_BENEFICIAL,
901 true, /* Prefer constant pool. */
902 arm_default_branch_cost
905 const struct tune_params arm_xscale_tune =
907 arm_xscale_rtx_costs,
908 xscale_sched_adjust_cost,
909 2, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost
916 const struct tune_params arm_9e_tune =
920 1, /* Constant limit. */
921 5, /* Max cond insns. */
922 ARM_PREFETCH_NOT_BENEFICIAL,
923 true, /* Prefer constant pool. */
924 arm_default_branch_cost
927 const struct tune_params arm_v6t2_tune =
931 1, /* Constant limit. */
932 5, /* Max cond insns. */
933 ARM_PREFETCH_NOT_BENEFICIAL,
934 false, /* Prefer constant pool. */
935 arm_default_branch_cost
938 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
939 const struct tune_params arm_cortex_tune =
943 1, /* Constant limit. */
944 5, /* Max cond insns. */
945 ARM_PREFETCH_NOT_BENEFICIAL,
946 false, /* Prefer constant pool. */
947 arm_default_branch_cost
950 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
951 less appealing. Set max_insns_skipped to a low value. */
953 const struct tune_params arm_cortex_a5_tune =
957 1, /* Constant limit. */
958 1, /* Max cond insns. */
959 ARM_PREFETCH_NOT_BENEFICIAL,
960 false, /* Prefer constant pool. */
961 arm_cortex_a5_branch_cost
964 const struct tune_params arm_cortex_a9_tune =
967 cortex_a9_sched_adjust_cost,
968 1, /* Constant limit. */
969 5, /* Max cond insns. */
970 ARM_PREFETCH_BENEFICIAL(4,32,32),
971 false, /* Prefer constant pool. */
972 arm_default_branch_cost
975 const struct tune_params arm_fa726te_tune =
978 fa726te_sched_adjust_cost,
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL,
982 true, /* Prefer constant pool. */
983 arm_default_branch_cost
987 /* Not all of these give usefully different compilation alternatives,
988 but there is no simple way of generalizing them. */
989 static const struct processors all_cores[] =
992 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
993 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
994 #include "arm-cores.def"
996 {NULL, arm_none, NULL, 0, NULL}
999 static const struct processors all_architectures[] =
1001 /* ARM Architectures */
1002 /* We don't specify tuning costs here as it will be figured out
1005 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1006 {NAME, CORE, #ARCH, FLAGS, NULL},
1007 #include "arm-arches.def"
1009 {NULL, arm_none, NULL, 0 , NULL}
1013 /* These are populated as commandline arguments are processed, or NULL
1014 if not specified. */
1015 static const struct processors *arm_selected_arch;
1016 static const struct processors *arm_selected_cpu;
1017 static const struct processors *arm_selected_tune;
1019 /* The name of the preprocessor macro to define for this architecture. */
1021 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1023 /* Available values for -mfpu=. */
1025 static const struct arm_fpu_desc all_fpus[] =
1027 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1028 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1029 #include "arm-fpus.def"
1034 /* Supported TLS relocations. */
1042 TLS_DESCSEQ /* GNU scheme */
1045 /* The maximum number of insns to be used when loading a constant. */
1047 arm_constant_limit (bool size_p)
1049 return size_p ? 1 : current_tune->constant_limit;
1052 /* Emit an insn that's a simple single-set. Both the operands must be known
1055 emit_set_insn (rtx x, rtx y)
1057 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1060 /* Return the number of bits set in VALUE. */
1062 bit_count (unsigned long value)
1064 unsigned long count = 0;
1069 value &= value - 1; /* Clear the least-significant set bit. */
1077 enum machine_mode mode;
1079 } arm_fixed_mode_set;
1081 /* A small helper for setting fixed-point library libfuncs. */
1084 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1085 const char *funcname, const char *modename,
1090 if (num_suffix == 0)
1091 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1093 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1095 set_optab_libfunc (optable, mode, buffer);
1099 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1100 enum machine_mode from, const char *funcname,
1101 const char *toname, const char *fromname)
1104 const char *maybe_suffix_2 = "";
1106 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1107 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1108 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1109 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1110 maybe_suffix_2 = "2";
1112 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1115 set_conv_libfunc (optable, to, from, buffer);
1118 /* Set up library functions unique to ARM. */
1121 arm_init_libfuncs (void)
1123 /* For Linux, we have access to kernel support for atomic operations. */
1124 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1125 init_sync_libfuncs (2 * UNITS_PER_WORD);
1127 /* There are no special library functions unless we are using the
1132 /* The functions below are described in Section 4 of the "Run-Time
1133 ABI for the ARM architecture", Version 1.0. */
1135 /* Double-precision floating-point arithmetic. Table 2. */
1136 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1137 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1138 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1139 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1140 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1142 /* Double-precision comparisons. Table 3. */
1143 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1144 set_optab_libfunc (ne_optab, DFmode, NULL);
1145 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1146 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1147 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1148 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1149 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1151 /* Single-precision floating-point arithmetic. Table 4. */
1152 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1153 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1154 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1155 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1156 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1158 /* Single-precision comparisons. Table 5. */
1159 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1160 set_optab_libfunc (ne_optab, SFmode, NULL);
1161 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1162 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1163 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1164 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1165 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1167 /* Floating-point to integer conversions. Table 6. */
1168 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1169 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1170 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1171 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1172 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1173 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1174 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1175 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1177 /* Conversions between floating types. Table 7. */
1178 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1179 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1181 /* Integer to floating-point conversions. Table 8. */
1182 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1183 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1184 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1185 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1186 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1187 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1188 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1189 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1191 /* Long long. Table 9. */
1192 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1193 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1194 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1195 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1196 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1197 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1198 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1199 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1201 /* Integer (32/32->32) division. \S 4.3.1. */
1202 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1203 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1205 /* The divmod functions are designed so that they can be used for
1206 plain division, even though they return both the quotient and the
1207 remainder. The quotient is returned in the usual location (i.e.,
1208 r0 for SImode, {r0, r1} for DImode), just as would be expected
1209 for an ordinary division routine. Because the AAPCS calling
1210 conventions specify that all of { r0, r1, r2, r3 } are
1211 callee-saved registers, there is no need to tell the compiler
1212 explicitly that those registers are clobbered by these
1214 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1215 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1217 /* For SImode division the ABI provides div-without-mod routines,
1218 which are faster. */
1219 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1220 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1222 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1223 divmod libcalls instead. */
1224 set_optab_libfunc (smod_optab, DImode, NULL);
1225 set_optab_libfunc (umod_optab, DImode, NULL);
1226 set_optab_libfunc (smod_optab, SImode, NULL);
1227 set_optab_libfunc (umod_optab, SImode, NULL);
1229 /* Half-precision float operations. The compiler handles all operations
1230 with NULL libfuncs by converting the SFmode. */
1231 switch (arm_fp16_format)
1233 case ARM_FP16_FORMAT_IEEE:
1234 case ARM_FP16_FORMAT_ALTERNATIVE:
1237 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1238 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1240 : "__gnu_f2h_alternative"));
1241 set_conv_libfunc (sext_optab, SFmode, HFmode,
1242 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1244 : "__gnu_h2f_alternative"));
1247 set_optab_libfunc (add_optab, HFmode, NULL);
1248 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1249 set_optab_libfunc (smul_optab, HFmode, NULL);
1250 set_optab_libfunc (neg_optab, HFmode, NULL);
1251 set_optab_libfunc (sub_optab, HFmode, NULL);
1254 set_optab_libfunc (eq_optab, HFmode, NULL);
1255 set_optab_libfunc (ne_optab, HFmode, NULL);
1256 set_optab_libfunc (lt_optab, HFmode, NULL);
1257 set_optab_libfunc (le_optab, HFmode, NULL);
1258 set_optab_libfunc (ge_optab, HFmode, NULL);
1259 set_optab_libfunc (gt_optab, HFmode, NULL);
1260 set_optab_libfunc (unord_optab, HFmode, NULL);
1267 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1269 const arm_fixed_mode_set fixed_arith_modes[] =
1290 const arm_fixed_mode_set fixed_conv_modes[] =
1320 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1322 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1323 "add", fixed_arith_modes[i].name, 3);
1324 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1325 "ssadd", fixed_arith_modes[i].name, 3);
1326 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1327 "usadd", fixed_arith_modes[i].name, 3);
1328 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1329 "sub", fixed_arith_modes[i].name, 3);
1330 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1331 "sssub", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1333 "ussub", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1335 "mul", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1337 "ssmul", fixed_arith_modes[i].name, 3);
1338 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1339 "usmul", fixed_arith_modes[i].name, 3);
1340 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1341 "div", fixed_arith_modes[i].name, 3);
1342 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1343 "udiv", fixed_arith_modes[i].name, 3);
1344 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1345 "ssdiv", fixed_arith_modes[i].name, 3);
1346 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1347 "usdiv", fixed_arith_modes[i].name, 3);
1348 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1349 "neg", fixed_arith_modes[i].name, 2);
1350 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1351 "ssneg", fixed_arith_modes[i].name, 2);
1352 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1353 "usneg", fixed_arith_modes[i].name, 2);
1354 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1355 "ashl", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1357 "ashr", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1359 "lshr", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1361 "ssashl", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1363 "usashl", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1365 "cmp", fixed_arith_modes[i].name, 2);
1368 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1369 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1372 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1373 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1376 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1377 fixed_conv_modes[j].mode, "fract",
1378 fixed_conv_modes[i].name,
1379 fixed_conv_modes[j].name);
1380 arm_set_fixed_conv_libfunc (satfract_optab,
1381 fixed_conv_modes[i].mode,
1382 fixed_conv_modes[j].mode, "satfract",
1383 fixed_conv_modes[i].name,
1384 fixed_conv_modes[j].name);
1385 arm_set_fixed_conv_libfunc (fractuns_optab,
1386 fixed_conv_modes[i].mode,
1387 fixed_conv_modes[j].mode, "fractuns",
1388 fixed_conv_modes[i].name,
1389 fixed_conv_modes[j].name);
1390 arm_set_fixed_conv_libfunc (satfractuns_optab,
1391 fixed_conv_modes[i].mode,
1392 fixed_conv_modes[j].mode, "satfractuns",
1393 fixed_conv_modes[i].name,
1394 fixed_conv_modes[j].name);
1398 if (TARGET_AAPCS_BASED)
1399 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1402 /* On AAPCS systems, this is the "struct __va_list". */
1403 static GTY(()) tree va_list_type;
1405 /* Return the type to use as __builtin_va_list. */
1407 arm_build_builtin_va_list (void)
1412 if (!TARGET_AAPCS_BASED)
1413 return std_build_builtin_va_list ();
1415 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1423 The C Library ABI further reinforces this definition in \S
1426 We must follow this definition exactly. The structure tag
1427 name is visible in C++ mangled names, and thus forms a part
1428 of the ABI. The field name may be used by people who
1429 #include <stdarg.h>. */
1430 /* Create the type. */
1431 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1432 /* Give it the required name. */
1433 va_list_name = build_decl (BUILTINS_LOCATION,
1435 get_identifier ("__va_list"),
1437 DECL_ARTIFICIAL (va_list_name) = 1;
1438 TYPE_NAME (va_list_type) = va_list_name;
1439 TYPE_STUB_DECL (va_list_type) = va_list_name;
1440 /* Create the __ap field. */
1441 ap_field = build_decl (BUILTINS_LOCATION,
1443 get_identifier ("__ap"),
1445 DECL_ARTIFICIAL (ap_field) = 1;
1446 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1447 TYPE_FIELDS (va_list_type) = ap_field;
1448 /* Compute its layout. */
1449 layout_type (va_list_type);
1451 return va_list_type;
1454 /* Return an expression of type "void *" pointing to the next
1455 available argument in a variable-argument list. VALIST is the
1456 user-level va_list object, of type __builtin_va_list. */
1458 arm_extract_valist_ptr (tree valist)
1460 if (TREE_TYPE (valist) == error_mark_node)
1461 return error_mark_node;
1463 /* On an AAPCS target, the pointer is stored within "struct
1465 if (TARGET_AAPCS_BASED)
1467 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1468 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1469 valist, ap_field, NULL_TREE);
1475 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1477 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1479 valist = arm_extract_valist_ptr (valist);
1480 std_expand_builtin_va_start (valist, nextarg);
1483 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1485 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1488 valist = arm_extract_valist_ptr (valist);
1489 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1492 /* Fix up any incompatible options that the user has specified. */
1494 arm_option_override (void)
1496 if (global_options_set.x_arm_arch_option)
1497 arm_selected_arch = &all_architectures[arm_arch_option];
1499 if (global_options_set.x_arm_cpu_option)
1500 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1502 if (global_options_set.x_arm_tune_option)
1503 arm_selected_tune = &all_cores[(int) arm_tune_option];
1505 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1506 SUBTARGET_OVERRIDE_OPTIONS;
1509 if (arm_selected_arch)
1511 if (arm_selected_cpu)
1513 /* Check for conflict between mcpu and march. */
1514 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1516 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1517 arm_selected_cpu->name, arm_selected_arch->name);
1518 /* -march wins for code generation.
1519 -mcpu wins for default tuning. */
1520 if (!arm_selected_tune)
1521 arm_selected_tune = arm_selected_cpu;
1523 arm_selected_cpu = arm_selected_arch;
1527 arm_selected_arch = NULL;
1530 /* Pick a CPU based on the architecture. */
1531 arm_selected_cpu = arm_selected_arch;
1534 /* If the user did not specify a processor, choose one for them. */
1535 if (!arm_selected_cpu)
1537 const struct processors * sel;
1538 unsigned int sought;
1540 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1541 if (!arm_selected_cpu->name)
1543 #ifdef SUBTARGET_CPU_DEFAULT
1544 /* Use the subtarget default CPU if none was specified by
1546 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1548 /* Default to ARM6. */
1549 if (!arm_selected_cpu->name)
1550 arm_selected_cpu = &all_cores[arm6];
1553 sel = arm_selected_cpu;
1554 insn_flags = sel->flags;
1556 /* Now check to see if the user has specified some command line
1557 switch that require certain abilities from the cpu. */
1560 if (TARGET_INTERWORK || TARGET_THUMB)
1562 sought |= (FL_THUMB | FL_MODE32);
1564 /* There are no ARM processors that support both APCS-26 and
1565 interworking. Therefore we force FL_MODE26 to be removed
1566 from insn_flags here (if it was set), so that the search
1567 below will always be able to find a compatible processor. */
1568 insn_flags &= ~FL_MODE26;
1571 if (sought != 0 && ((sought & insn_flags) != sought))
1573 /* Try to locate a CPU type that supports all of the abilities
1574 of the default CPU, plus the extra abilities requested by
1576 for (sel = all_cores; sel->name != NULL; sel++)
1577 if ((sel->flags & sought) == (sought | insn_flags))
1580 if (sel->name == NULL)
1582 unsigned current_bit_count = 0;
1583 const struct processors * best_fit = NULL;
1585 /* Ideally we would like to issue an error message here
1586 saying that it was not possible to find a CPU compatible
1587 with the default CPU, but which also supports the command
1588 line options specified by the programmer, and so they
1589 ought to use the -mcpu=<name> command line option to
1590 override the default CPU type.
1592 If we cannot find a cpu that has both the
1593 characteristics of the default cpu and the given
1594 command line options we scan the array again looking
1595 for a best match. */
1596 for (sel = all_cores; sel->name != NULL; sel++)
1597 if ((sel->flags & sought) == sought)
1601 count = bit_count (sel->flags & insn_flags);
1603 if (count >= current_bit_count)
1606 current_bit_count = count;
1610 gcc_assert (best_fit);
1614 arm_selected_cpu = sel;
1618 gcc_assert (arm_selected_cpu);
1619 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1620 if (!arm_selected_tune)
1621 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1623 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1624 insn_flags = arm_selected_cpu->flags;
1626 arm_tune = arm_selected_tune->core;
1627 tune_flags = arm_selected_tune->flags;
1628 current_tune = arm_selected_tune->tune;
1630 /* Make sure that the processor choice does not conflict with any of the
1631 other command line choices. */
1632 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1633 error ("target CPU does not support ARM mode");
1635 /* BPABI targets use linker tricks to allow interworking on cores
1636 without thumb support. */
1637 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1639 warning (0, "target CPU does not support interworking" );
1640 target_flags &= ~MASK_INTERWORK;
1643 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1645 warning (0, "target CPU does not support THUMB instructions");
1646 target_flags &= ~MASK_THUMB;
1649 if (TARGET_APCS_FRAME && TARGET_THUMB)
1651 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1652 target_flags &= ~MASK_APCS_FRAME;
1655 /* Callee super interworking implies thumb interworking. Adding
1656 this to the flags here simplifies the logic elsewhere. */
1657 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1658 target_flags |= MASK_INTERWORK;
1660 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1661 from here where no function is being compiled currently. */
1662 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1663 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1665 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1666 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1668 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1670 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1671 target_flags |= MASK_APCS_FRAME;
1674 if (TARGET_POKE_FUNCTION_NAME)
1675 target_flags |= MASK_APCS_FRAME;
1677 if (TARGET_APCS_REENT && flag_pic)
1678 error ("-fpic and -mapcs-reent are incompatible");
1680 if (TARGET_APCS_REENT)
1681 warning (0, "APCS reentrant code not supported. Ignored");
1683 /* If this target is normally configured to use APCS frames, warn if they
1684 are turned off and debugging is turned on. */
1686 && write_symbols != NO_DEBUG
1687 && !TARGET_APCS_FRAME
1688 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1689 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1691 if (TARGET_APCS_FLOAT)
1692 warning (0, "passing floating point arguments in fp regs not yet supported");
1694 if (TARGET_LITTLE_WORDS)
1695 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1696 "will be removed in a future release");
1698 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1699 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1700 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1701 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1702 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1703 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1704 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1705 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1706 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1707 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1708 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1709 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1710 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1711 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1713 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1714 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1715 thumb_code = TARGET_ARM == 0;
1716 thumb1_code = TARGET_THUMB1 != 0;
1717 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1718 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1719 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1720 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1721 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1722 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1724 /* If we are not using the default (ARM mode) section anchor offset
1725 ranges, then set the correct ranges now. */
1728 /* Thumb-1 LDR instructions cannot have negative offsets.
1729 Permissible positive offset ranges are 5-bit (for byte loads),
1730 6-bit (for halfword loads), or 7-bit (for word loads).
1731 Empirical results suggest a 7-bit anchor range gives the best
1732 overall code size. */
1733 targetm.min_anchor_offset = 0;
1734 targetm.max_anchor_offset = 127;
1736 else if (TARGET_THUMB2)
1738 /* The minimum is set such that the total size of the block
1739 for a particular anchor is 248 + 1 + 4095 bytes, which is
1740 divisible by eight, ensuring natural spacing of anchors. */
1741 targetm.min_anchor_offset = -248;
1742 targetm.max_anchor_offset = 4095;
1745 /* V5 code we generate is completely interworking capable, so we turn off
1746 TARGET_INTERWORK here to avoid many tests later on. */
1748 /* XXX However, we must pass the right pre-processor defines to CPP
1749 or GLD can get confused. This is a hack. */
1750 if (TARGET_INTERWORK)
1751 arm_cpp_interwork = 1;
1754 target_flags &= ~MASK_INTERWORK;
1756 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1757 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1759 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1760 error ("iwmmxt abi requires an iwmmxt capable cpu");
1762 if (!global_options_set.x_arm_fpu_index)
1764 const char *target_fpu_name;
1767 #ifdef FPUTYPE_DEFAULT
1768 target_fpu_name = FPUTYPE_DEFAULT;
1770 if (arm_arch_cirrus)
1771 target_fpu_name = "maverick";
1773 target_fpu_name = "fpe2";
1776 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1781 arm_fpu_desc = &all_fpus[arm_fpu_index];
1783 switch (arm_fpu_desc->model)
1785 case ARM_FP_MODEL_FPA:
1786 if (arm_fpu_desc->rev == 2)
1787 arm_fpu_attr = FPU_FPE2;
1788 else if (arm_fpu_desc->rev == 3)
1789 arm_fpu_attr = FPU_FPE3;
1791 arm_fpu_attr = FPU_FPA;
1794 case ARM_FP_MODEL_MAVERICK:
1795 arm_fpu_attr = FPU_MAVERICK;
1798 case ARM_FP_MODEL_VFP:
1799 arm_fpu_attr = FPU_VFP;
1806 if (TARGET_AAPCS_BASED
1807 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1808 error ("FPA is unsupported in the AAPCS");
1810 if (TARGET_AAPCS_BASED)
1812 if (TARGET_CALLER_INTERWORKING)
1813 error ("AAPCS does not support -mcaller-super-interworking");
1815 if (TARGET_CALLEE_INTERWORKING)
1816 error ("AAPCS does not support -mcallee-super-interworking");
1819 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1820 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1821 will ever exist. GCC makes no attempt to support this combination. */
1822 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1823 sorry ("iWMMXt and hardware floating point");
1825 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1826 if (TARGET_THUMB2 && TARGET_IWMMXT)
1827 sorry ("Thumb-2 iWMMXt");
1829 /* __fp16 support currently assumes the core has ldrh. */
1830 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1831 sorry ("__fp16 and no ldrh");
1833 /* If soft-float is specified then don't use FPU. */
1834 if (TARGET_SOFT_FLOAT)
1835 arm_fpu_attr = FPU_NONE;
1837 if (TARGET_AAPCS_BASED)
1839 if (arm_abi == ARM_ABI_IWMMXT)
1840 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1841 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1842 && TARGET_HARD_FLOAT
1844 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1846 arm_pcs_default = ARM_PCS_AAPCS;
1850 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1851 sorry ("-mfloat-abi=hard and VFP");
1853 if (arm_abi == ARM_ABI_APCS)
1854 arm_pcs_default = ARM_PCS_APCS;
1856 arm_pcs_default = ARM_PCS_ATPCS;
1859 /* For arm2/3 there is no need to do any scheduling if there is only
1860 a floating point emulator, or we are doing software floating-point. */
1861 if ((TARGET_SOFT_FLOAT
1862 || (TARGET_FPA && arm_fpu_desc->rev))
1863 && (tune_flags & FL_MODE32) == 0)
1864 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1866 /* Use the cp15 method if it is available. */
1867 if (target_thread_pointer == TP_AUTO)
1869 if (arm_arch6k && !TARGET_THUMB1)
1870 target_thread_pointer = TP_CP15;
1872 target_thread_pointer = TP_SOFT;
1875 if (TARGET_HARD_TP && TARGET_THUMB1)
1876 error ("can not use -mtp=cp15 with 16-bit Thumb");
1878 /* Override the default structure alignment for AAPCS ABI. */
1879 if (!global_options_set.x_arm_structure_size_boundary)
1881 if (TARGET_AAPCS_BASED)
1882 arm_structure_size_boundary = 8;
1886 if (arm_structure_size_boundary != 8
1887 && arm_structure_size_boundary != 32
1888 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1890 if (ARM_DOUBLEWORD_ALIGN)
1892 "structure size boundary can only be set to 8, 32 or 64");
1894 warning (0, "structure size boundary can only be set to 8 or 32");
1895 arm_structure_size_boundary
1896 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1900 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1902 error ("RTP PIC is incompatible with Thumb");
1906 /* If stack checking is disabled, we can use r10 as the PIC register,
1907 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1908 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1910 if (TARGET_VXWORKS_RTP)
1911 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1912 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1915 if (flag_pic && TARGET_VXWORKS_RTP)
1916 arm_pic_register = 9;
1918 if (arm_pic_register_string != NULL)
1920 int pic_register = decode_reg_name (arm_pic_register_string);
1923 warning (0, "-mpic-register= is useless without -fpic");
1925 /* Prevent the user from choosing an obviously stupid PIC register. */
1926 else if (pic_register < 0 || call_used_regs[pic_register]
1927 || pic_register == HARD_FRAME_POINTER_REGNUM
1928 || pic_register == STACK_POINTER_REGNUM
1929 || pic_register >= PC_REGNUM
1930 || (TARGET_VXWORKS_RTP
1931 && (unsigned int) pic_register != arm_pic_register))
1932 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1934 arm_pic_register = pic_register;
1937 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1938 if (fix_cm3_ldrd == 2)
1940 if (arm_selected_cpu->core == cortexm3)
1946 /* Enable -munaligned-access by default for
1947 - all ARMv6 architecture-based processors
1948 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1950 Disable -munaligned-access by default for
1951 - all pre-ARMv6 architecture-based processors
1952 - ARMv6-M architecture-based processors. */
1954 if (unaligned_access == 2)
1956 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1957 unaligned_access = 1;
1959 unaligned_access = 0;
1961 else if (unaligned_access == 1
1962 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1964 warning (0, "target CPU does not support unaligned accesses");
1965 unaligned_access = 0;
1968 if (TARGET_THUMB1 && flag_schedule_insns)
1970 /* Don't warn since it's on by default in -O2. */
1971 flag_schedule_insns = 0;
1976 /* If optimizing for size, bump the number of instructions that we
1977 are prepared to conditionally execute (even on a StrongARM). */
1978 max_insns_skipped = 6;
1981 max_insns_skipped = current_tune->max_insns_skipped;
1983 /* Hot/Cold partitioning is not currently supported, since we can't
1984 handle literal pool placement in that case. */
1985 if (flag_reorder_blocks_and_partition)
1987 inform (input_location,
1988 "-freorder-blocks-and-partition not supported on this architecture");
1989 flag_reorder_blocks_and_partition = 0;
1990 flag_reorder_blocks = 1;
1994 /* Hoisting PIC address calculations more aggressively provides a small,
1995 but measurable, size reduction for PIC code. Therefore, we decrease
1996 the bar for unrestricted expression hoisting to the cost of PIC address
1997 calculation, which is 2 instructions. */
1998 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1999 global_options.x_param_values,
2000 global_options_set.x_param_values);
2002 /* ARM EABI defaults to strict volatile bitfields. */
2003 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2004 && abi_version_at_least(2))
2005 flag_strict_volatile_bitfields = 1;
2007 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2008 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2009 if (flag_prefetch_loop_arrays < 0
2012 && current_tune->num_prefetch_slots > 0)
2013 flag_prefetch_loop_arrays = 1;
2015 /* Set up parameters to be used in prefetching algorithm. Do not override the
2016 defaults unless we are tuning for a core we have researched values for. */
2017 if (current_tune->num_prefetch_slots > 0)
2018 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2019 current_tune->num_prefetch_slots,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2022 if (current_tune->l1_cache_line_size >= 0)
2023 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2024 current_tune->l1_cache_line_size,
2025 global_options.x_param_values,
2026 global_options_set.x_param_values);
2027 if (current_tune->l1_cache_size >= 0)
2028 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2029 current_tune->l1_cache_size,
2030 global_options.x_param_values,
2031 global_options_set.x_param_values);
2033 /* Register global variables with the garbage collector. */
2034 arm_add_gc_roots ();
2038 arm_add_gc_roots (void)
2040 gcc_obstack_init(&minipool_obstack);
2041 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2044 /* A table of known ARM exception types.
2045 For use with the interrupt function attribute. */
2049 const char *const arg;
2050 const unsigned long return_value;
2054 static const isr_attribute_arg isr_attribute_args [] =
2056 { "IRQ", ARM_FT_ISR },
2057 { "irq", ARM_FT_ISR },
2058 { "FIQ", ARM_FT_FIQ },
2059 { "fiq", ARM_FT_FIQ },
2060 { "ABORT", ARM_FT_ISR },
2061 { "abort", ARM_FT_ISR },
2062 { "ABORT", ARM_FT_ISR },
2063 { "abort", ARM_FT_ISR },
2064 { "UNDEF", ARM_FT_EXCEPTION },
2065 { "undef", ARM_FT_EXCEPTION },
2066 { "SWI", ARM_FT_EXCEPTION },
2067 { "swi", ARM_FT_EXCEPTION },
2068 { NULL, ARM_FT_NORMAL }
2071 /* Returns the (interrupt) function type of the current
2072 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2074 static unsigned long
2075 arm_isr_value (tree argument)
2077 const isr_attribute_arg * ptr;
2081 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2083 /* No argument - default to IRQ. */
2084 if (argument == NULL_TREE)
2087 /* Get the value of the argument. */
2088 if (TREE_VALUE (argument) == NULL_TREE
2089 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2090 return ARM_FT_UNKNOWN;
2092 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2094 /* Check it against the list of known arguments. */
2095 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2096 if (streq (arg, ptr->arg))
2097 return ptr->return_value;
2099 /* An unrecognized interrupt type. */
2100 return ARM_FT_UNKNOWN;
2103 /* Computes the type of the current function. */
2105 static unsigned long
2106 arm_compute_func_type (void)
2108 unsigned long type = ARM_FT_UNKNOWN;
2112 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2114 /* Decide if the current function is volatile. Such functions
2115 never return, and many memory cycles can be saved by not storing
2116 register values that will never be needed again. This optimization
2117 was added to speed up context switching in a kernel application. */
2119 && (TREE_NOTHROW (current_function_decl)
2120 || !(flag_unwind_tables
2122 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2123 && TREE_THIS_VOLATILE (current_function_decl))
2124 type |= ARM_FT_VOLATILE;
2126 if (cfun->static_chain_decl != NULL)
2127 type |= ARM_FT_NESTED;
2129 attr = DECL_ATTRIBUTES (current_function_decl);
2131 a = lookup_attribute ("naked", attr);
2133 type |= ARM_FT_NAKED;
2135 a = lookup_attribute ("isr", attr);
2137 a = lookup_attribute ("interrupt", attr);
2140 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2142 type |= arm_isr_value (TREE_VALUE (a));
2147 /* Returns the type of the current function. */
2150 arm_current_func_type (void)
2152 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2153 cfun->machine->func_type = arm_compute_func_type ();
2155 return cfun->machine->func_type;
2159 arm_allocate_stack_slots_for_args (void)
2161 /* Naked functions should not allocate stack slots for arguments. */
2162 return !IS_NAKED (arm_current_func_type ());
2166 /* Output assembler code for a block containing the constant parts
2167 of a trampoline, leaving space for the variable parts.
2169 On the ARM, (if r8 is the static chain regnum, and remembering that
2170 referencing pc adds an offset of 8) the trampoline looks like:
2173 .word static chain value
2174 .word function's address
2175 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2178 arm_asm_trampoline_template (FILE *f)
2182 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2183 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2185 else if (TARGET_THUMB2)
2187 /* The Thumb-2 trampoline is similar to the arm implementation.
2188 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2189 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2190 STATIC_CHAIN_REGNUM, PC_REGNUM);
2191 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2195 ASM_OUTPUT_ALIGN (f, 2);
2196 fprintf (f, "\t.code\t16\n");
2197 fprintf (f, ".Ltrampoline_start:\n");
2198 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2199 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2200 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2201 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2202 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2203 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2205 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2206 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2209 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2212 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2214 rtx fnaddr, mem, a_tramp;
2216 emit_block_move (m_tramp, assemble_trampoline_template (),
2217 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2219 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2220 emit_move_insn (mem, chain_value);
2222 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2223 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2224 emit_move_insn (mem, fnaddr);
2226 a_tramp = XEXP (m_tramp, 0);
2227 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2228 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2229 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2232 /* Thumb trampolines should be entered in thumb mode, so set
2233 the bottom bit of the address. */
2236 arm_trampoline_adjust_address (rtx addr)
2239 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2240 NULL, 0, OPTAB_LIB_WIDEN);
2244 /* Return 1 if it is possible to return using a single instruction.
2245 If SIBLING is non-null, this is a test for a return before a sibling
2246 call. SIBLING is the call insn, so we can examine its register usage. */
2249 use_return_insn (int iscond, rtx sibling)
2252 unsigned int func_type;
2253 unsigned long saved_int_regs;
2254 unsigned HOST_WIDE_INT stack_adjust;
2255 arm_stack_offsets *offsets;
2257 /* Never use a return instruction before reload has run. */
2258 if (!reload_completed)
2261 func_type = arm_current_func_type ();
2263 /* Naked, volatile and stack alignment functions need special
2265 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2268 /* So do interrupt functions that use the frame pointer and Thumb
2269 interrupt functions. */
2270 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2273 offsets = arm_get_frame_offsets ();
2274 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2276 /* As do variadic functions. */
2277 if (crtl->args.pretend_args_size
2278 || cfun->machine->uses_anonymous_args
2279 /* Or if the function calls __builtin_eh_return () */
2280 || crtl->calls_eh_return
2281 /* Or if the function calls alloca */
2282 || cfun->calls_alloca
2283 /* Or if there is a stack adjustment. However, if the stack pointer
2284 is saved on the stack, we can use a pre-incrementing stack load. */
2285 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2286 && stack_adjust == 4)))
2289 saved_int_regs = offsets->saved_regs_mask;
2291 /* Unfortunately, the insn
2293 ldmib sp, {..., sp, ...}
2295 triggers a bug on most SA-110 based devices, such that the stack
2296 pointer won't be correctly restored if the instruction takes a
2297 page fault. We work around this problem by popping r3 along with
2298 the other registers, since that is never slower than executing
2299 another instruction.
2301 We test for !arm_arch5 here, because code for any architecture
2302 less than this could potentially be run on one of the buggy
2304 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2306 /* Validate that r3 is a call-clobbered register (always true in
2307 the default abi) ... */
2308 if (!call_used_regs[3])
2311 /* ... that it isn't being used for a return value ... */
2312 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2315 /* ... or for a tail-call argument ... */
2318 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2320 if (find_regno_fusage (sibling, USE, 3))
2324 /* ... and that there are no call-saved registers in r0-r2
2325 (always true in the default ABI). */
2326 if (saved_int_regs & 0x7)
2330 /* Can't be done if interworking with Thumb, and any registers have been
2332 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2335 /* On StrongARM, conditional returns are expensive if they aren't
2336 taken and multiple registers have been stacked. */
2337 if (iscond && arm_tune_strongarm)
2339 /* Conditional return when just the LR is stored is a simple
2340 conditional-load instruction, that's not expensive. */
2341 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2345 && arm_pic_register != INVALID_REGNUM
2346 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2350 /* If there are saved registers but the LR isn't saved, then we need
2351 two instructions for the return. */
2352 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2355 /* Can't be done if any of the FPA regs are pushed,
2356 since this also requires an insn. */
2357 if (TARGET_HARD_FLOAT && TARGET_FPA)
2358 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2359 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2362 /* Likewise VFP regs. */
2363 if (TARGET_HARD_FLOAT && TARGET_VFP)
2364 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2365 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2368 if (TARGET_REALLY_IWMMXT)
2369 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2370 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2376 /* Return TRUE if int I is a valid immediate ARM constant. */
2379 const_ok_for_arm (HOST_WIDE_INT i)
2383 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2384 be all zero, or all one. */
2385 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2386 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2387 != ((~(unsigned HOST_WIDE_INT) 0)
2388 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2391 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2393 /* Fast return for 0 and small values. We must do this for zero, since
2394 the code below can't handle that one case. */
2395 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2398 /* Get the number of trailing zeros. */
2399 lowbit = ffs((int) i) - 1;
2401 /* Only even shifts are allowed in ARM mode so round down to the
2402 nearest even number. */
2406 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2411 /* Allow rotated constants in ARM mode. */
2413 && ((i & ~0xc000003f) == 0
2414 || (i & ~0xf000000f) == 0
2415 || (i & ~0xfc000003) == 0))
2422 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2425 if (i == v || i == (v | (v << 8)))
2428 /* Allow repeated pattern 0xXY00XY00. */
2438 /* Return true if I is a valid constant for the operation CODE. */
2440 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2442 if (const_ok_for_arm (i))
2448 /* See if we can use movw. */
2449 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2452 /* Otherwise, try mvn. */
2453 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2456 /* See if we can use addw or subw. */
2458 && ((i & 0xfffff000) == 0
2459 || ((-i) & 0xfffff000) == 0))
2461 /* else fall through. */
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2483 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2489 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2493 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2500 /* Emit a sequence of insns to handle a large constant.
2501 CODE is the code of the operation required, it can be any of SET, PLUS,
2502 IOR, AND, XOR, MINUS;
2503 MODE is the mode in which the operation is being performed;
2504 VAL is the integer to operate on;
2505 SOURCE is the other operand (a register, or a null-pointer for SET);
2506 SUBTARGETS means it is safe to create scratch registers if that will
2507 either produce a simpler sequence, or we will want to cse the values.
2508 Return value is the number of insns emitted. */
2510 /* ??? Tweak this for thumb2. */
2512 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2513 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2517 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2518 cond = COND_EXEC_TEST (PATTERN (insn));
2522 if (subtargets || code == SET
2523 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2524 && REGNO (target) != REGNO (source)))
2526 /* After arm_reorg has been called, we can't fix up expensive
2527 constants by pushing them into memory so we must synthesize
2528 them in-line, regardless of the cost. This is only likely to
2529 be more costly on chips that have load delay slots and we are
2530 compiling without running the scheduler (so no splitting
2531 occurred before the final instruction emission).
2533 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2535 if (!after_arm_reorg
2537 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2539 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2544 /* Currently SET is the only monadic value for CODE, all
2545 the rest are diadic. */
2546 if (TARGET_USE_MOVT)
2547 arm_emit_movpair (target, GEN_INT (val));
2549 emit_set_insn (target, GEN_INT (val));
2555 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2557 if (TARGET_USE_MOVT)
2558 arm_emit_movpair (temp, GEN_INT (val));
2560 emit_set_insn (temp, GEN_INT (val));
2562 /* For MINUS, the value is subtracted from, since we never
2563 have subtraction of a constant. */
2565 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2567 emit_set_insn (target,
2568 gen_rtx_fmt_ee (code, mode, source, temp));
2574 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2578 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2579 ARM/THUMB2 immediates, and add up to VAL.
2580 Thr function return value gives the number of insns required. */
2582 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2583 struct four_ints *return_sequence)
2585 int best_consecutive_zeros = 0;
2589 struct four_ints tmp_sequence;
2591 /* If we aren't targetting ARM, the best place to start is always at
2592 the bottom, otherwise look more closely. */
2595 for (i = 0; i < 32; i += 2)
2597 int consecutive_zeros = 0;
2599 if (!(val & (3 << i)))
2601 while ((i < 32) && !(val & (3 << i)))
2603 consecutive_zeros += 2;
2606 if (consecutive_zeros > best_consecutive_zeros)
2608 best_consecutive_zeros = consecutive_zeros;
2609 best_start = i - consecutive_zeros;
2616 /* So long as it won't require any more insns to do so, it's
2617 desirable to emit a small constant (in bits 0...9) in the last
2618 insn. This way there is more chance that it can be combined with
2619 a later addressing insn to form a pre-indexed load or store
2620 operation. Consider:
2622 *((volatile int *)0xe0000100) = 1;
2623 *((volatile int *)0xe0000110) = 2;
2625 We want this to wind up as:
2629 str rB, [rA, #0x100]
2631 str rB, [rA, #0x110]
2633 rather than having to synthesize both large constants from scratch.
2635 Therefore, we calculate how many insns would be required to emit
2636 the constant starting from `best_start', and also starting from
2637 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2638 yield a shorter sequence, we may as well use zero. */
2639 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2641 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2643 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2644 if (insns2 <= insns1)
2646 *return_sequence = tmp_sequence;
2654 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2656 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2657 struct four_ints *return_sequence, int i)
2659 int remainder = val & 0xffffffff;
2662 /* Try and find a way of doing the job in either two or three
2665 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2666 location. We start at position I. This may be the MSB, or
2667 optimial_immediate_sequence may have positioned it at the largest block
2668 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2669 wrapping around to the top of the word when we drop off the bottom.
2670 In the worst case this code should produce no more than four insns.
2672 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2673 constants, shifted to any arbitrary location. We should always start
2678 unsigned int b1, b2, b3, b4;
2679 unsigned HOST_WIDE_INT result;
2682 gcc_assert (insns < 4);
2687 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2688 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2691 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2692 /* We can use addw/subw for the last 12 bits. */
2696 /* Use an 8-bit shifted/rotated immediate. */
2700 result = remainder & ((0x0ff << end)
2701 | ((i < end) ? (0xff >> (32 - end))
2708 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2709 arbitrary shifts. */
2710 i -= TARGET_ARM ? 2 : 1;
2714 /* Next, see if we can do a better job with a thumb2 replicated
2717 We do it this way around to catch the cases like 0x01F001E0 where
2718 two 8-bit immediates would work, but a replicated constant would
2721 TODO: 16-bit constants that don't clear all the bits, but still win.
2722 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2725 b1 = (remainder & 0xff000000) >> 24;
2726 b2 = (remainder & 0x00ff0000) >> 16;
2727 b3 = (remainder & 0x0000ff00) >> 8;
2728 b4 = remainder & 0xff;
2732 /* The 8-bit immediate already found clears b1 (and maybe b2),
2733 but must leave b3 and b4 alone. */
2735 /* First try to find a 32-bit replicated constant that clears
2736 almost everything. We can assume that we can't do it in one,
2737 or else we wouldn't be here. */
2738 unsigned int tmp = b1 & b2 & b3 & b4;
2739 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2741 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2742 + (tmp == b3) + (tmp == b4);
2744 && (matching_bytes >= 3
2745 || (matching_bytes == 2
2746 && const_ok_for_op (remainder & ~tmp2, code))))
2748 /* At least 3 of the bytes match, and the fourth has at
2749 least as many bits set, or two of the bytes match
2750 and it will only require one more insn to finish. */
2758 /* Second, try to find a 16-bit replicated constant that can
2759 leave three of the bytes clear. If b2 or b4 is already
2760 zero, then we can. If the 8-bit from above would not
2761 clear b2 anyway, then we still win. */
2762 else if (b1 == b3 && (!b2 || !b4
2763 || (remainder & 0x00ff0000 & ~result)))
2765 result = remainder & 0xff00ff00;
2771 /* The 8-bit immediate already found clears b2 (and maybe b3)
2772 and we don't get here unless b1 is alredy clear, but it will
2773 leave b4 unchanged. */
2775 /* If we can clear b2 and b4 at once, then we win, since the
2776 8-bits couldn't possibly reach that far. */
2779 result = remainder & 0x00ff00ff;
2785 return_sequence->i[insns++] = result;
2786 remainder &= ~result;
2788 if (code == SET || code == MINUS)
2796 /* Emit an instruction with the indicated PATTERN. If COND is
2797 non-NULL, conditionalize the execution of the instruction on COND
2801 emit_constant_insn (rtx cond, rtx pattern)
2804 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2805 emit_insn (pattern);
2808 /* As above, but extra parameter GENERATE which, if clear, suppresses
2812 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2813 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2818 int final_invert = 0;
2820 int set_sign_bit_copies = 0;
2821 int clear_sign_bit_copies = 0;
2822 int clear_zero_bit_copies = 0;
2823 int set_zero_bit_copies = 0;
2824 int insns = 0, neg_insns, inv_insns;
2825 unsigned HOST_WIDE_INT temp1, temp2;
2826 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2827 struct four_ints *immediates;
2828 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2830 /* Find out which operations are safe for a given CODE. Also do a quick
2831 check for degenerate cases; these can occur when DImode operations
2844 if (remainder == 0xffffffff)
2847 emit_constant_insn (cond,
2848 gen_rtx_SET (VOIDmode, target,
2849 GEN_INT (ARM_SIGN_EXTEND (val))));
2855 if (reload_completed && rtx_equal_p (target, source))
2859 emit_constant_insn (cond,
2860 gen_rtx_SET (VOIDmode, target, source));
2869 emit_constant_insn (cond,
2870 gen_rtx_SET (VOIDmode, target, const0_rtx));
2873 if (remainder == 0xffffffff)
2875 if (reload_completed && rtx_equal_p (target, source))
2878 emit_constant_insn (cond,
2879 gen_rtx_SET (VOIDmode, target, source));
2888 if (reload_completed && rtx_equal_p (target, source))
2891 emit_constant_insn (cond,
2892 gen_rtx_SET (VOIDmode, target, source));
2896 if (remainder == 0xffffffff)
2899 emit_constant_insn (cond,
2900 gen_rtx_SET (VOIDmode, target,
2901 gen_rtx_NOT (mode, source)));
2908 /* We treat MINUS as (val - source), since (source - val) is always
2909 passed as (source + (-val)). */
2913 emit_constant_insn (cond,
2914 gen_rtx_SET (VOIDmode, target,
2915 gen_rtx_NEG (mode, source)));
2918 if (const_ok_for_arm (val))
2921 emit_constant_insn (cond,
2922 gen_rtx_SET (VOIDmode, target,
2923 gen_rtx_MINUS (mode, GEN_INT (val),
2934 /* If we can do it in one insn get out quickly. */
2935 if (const_ok_for_op (val, code))
2938 emit_constant_insn (cond,
2939 gen_rtx_SET (VOIDmode, target,
2941 ? gen_rtx_fmt_ee (code, mode, source,
2947 /* Calculate a few attributes that may be useful for specific
2949 /* Count number of leading zeros. */
2950 for (i = 31; i >= 0; i--)
2952 if ((remainder & (1 << i)) == 0)
2953 clear_sign_bit_copies++;
2958 /* Count number of leading 1's. */
2959 for (i = 31; i >= 0; i--)
2961 if ((remainder & (1 << i)) != 0)
2962 set_sign_bit_copies++;
2967 /* Count number of trailing zero's. */
2968 for (i = 0; i <= 31; i++)
2970 if ((remainder & (1 << i)) == 0)
2971 clear_zero_bit_copies++;
2976 /* Count number of trailing 1's. */
2977 for (i = 0; i <= 31; i++)
2979 if ((remainder & (1 << i)) != 0)
2980 set_zero_bit_copies++;
2988 /* See if we can do this by sign_extending a constant that is known
2989 to be negative. This is a good, way of doing it, since the shift
2990 may well merge into a subsequent insn. */
2991 if (set_sign_bit_copies > 1)
2993 if (const_ok_for_arm
2994 (temp1 = ARM_SIGN_EXTEND (remainder
2995 << (set_sign_bit_copies - 1))))
2999 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3000 emit_constant_insn (cond,
3001 gen_rtx_SET (VOIDmode, new_src,
3003 emit_constant_insn (cond,
3004 gen_ashrsi3 (target, new_src,
3005 GEN_INT (set_sign_bit_copies - 1)));
3009 /* For an inverted constant, we will need to set the low bits,
3010 these will be shifted out of harm's way. */
3011 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3012 if (const_ok_for_arm (~temp1))
3016 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3017 emit_constant_insn (cond,
3018 gen_rtx_SET (VOIDmode, new_src,
3020 emit_constant_insn (cond,
3021 gen_ashrsi3 (target, new_src,
3022 GEN_INT (set_sign_bit_copies - 1)));
3028 /* See if we can calculate the value as the difference between two
3029 valid immediates. */
3030 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3032 int topshift = clear_sign_bit_copies & ~1;
3034 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3035 & (0xff000000 >> topshift));
3037 /* If temp1 is zero, then that means the 9 most significant
3038 bits of remainder were 1 and we've caused it to overflow.
3039 When topshift is 0 we don't need to do anything since we
3040 can borrow from 'bit 32'. */
3041 if (temp1 == 0 && topshift != 0)
3042 temp1 = 0x80000000 >> (topshift - 1);
3044 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3046 if (const_ok_for_arm (temp2))
3050 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3051 emit_constant_insn (cond,
3052 gen_rtx_SET (VOIDmode, new_src,
3054 emit_constant_insn (cond,
3055 gen_addsi3 (target, new_src,
3063 /* See if we can generate this by setting the bottom (or the top)
3064 16 bits, and then shifting these into the other half of the
3065 word. We only look for the simplest cases, to do more would cost
3066 too much. Be careful, however, not to generate this when the
3067 alternative would take fewer insns. */
3068 if (val & 0xffff0000)
3070 temp1 = remainder & 0xffff0000;
3071 temp2 = remainder & 0x0000ffff;
3073 /* Overlaps outside this range are best done using other methods. */
3074 for (i = 9; i < 24; i++)
3076 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3077 && !const_ok_for_arm (temp2))
3079 rtx new_src = (subtargets
3080 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3082 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3083 source, subtargets, generate);
3091 gen_rtx_ASHIFT (mode, source,
3098 /* Don't duplicate cases already considered. */
3099 for (i = 17; i < 24; i++)
3101 if (((temp1 | (temp1 >> i)) == remainder)
3102 && !const_ok_for_arm (temp1))
3104 rtx new_src = (subtargets
3105 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3107 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3108 source, subtargets, generate);
3113 gen_rtx_SET (VOIDmode, target,
3116 gen_rtx_LSHIFTRT (mode, source,
3127 /* If we have IOR or XOR, and the constant can be loaded in a
3128 single instruction, and we can find a temporary to put it in,
3129 then this can be done in two instructions instead of 3-4. */
3131 /* TARGET can't be NULL if SUBTARGETS is 0 */
3132 || (reload_completed && !reg_mentioned_p (target, source)))
3134 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3138 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3140 emit_constant_insn (cond,
3141 gen_rtx_SET (VOIDmode, sub,
3143 emit_constant_insn (cond,
3144 gen_rtx_SET (VOIDmode, target,
3145 gen_rtx_fmt_ee (code, mode,
3156 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3157 and the remainder 0s for e.g. 0xfff00000)
3158 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3160 This can be done in 2 instructions by using shifts with mov or mvn.
3165 mvn r0, r0, lsr #12 */
3166 if (set_sign_bit_copies > 8
3167 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3171 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3172 rtx shift = GEN_INT (set_sign_bit_copies);
3176 gen_rtx_SET (VOIDmode, sub,
3178 gen_rtx_ASHIFT (mode,
3183 gen_rtx_SET (VOIDmode, target,
3185 gen_rtx_LSHIFTRT (mode, sub,
3192 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3194 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3196 For eg. r0 = r0 | 0xfff
3201 if (set_zero_bit_copies > 8
3202 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3206 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3207 rtx shift = GEN_INT (set_zero_bit_copies);
3211 gen_rtx_SET (VOIDmode, sub,
3213 gen_rtx_LSHIFTRT (mode,
3218 gen_rtx_SET (VOIDmode, target,
3220 gen_rtx_ASHIFT (mode, sub,
3226 /* This will never be reached for Thumb2 because orn is a valid
3227 instruction. This is for Thumb1 and the ARM 32 bit cases.
3229 x = y | constant (such that ~constant is a valid constant)
3231 x = ~(~y & ~constant).
3233 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3237 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3238 emit_constant_insn (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_NOT (mode, source)));
3243 sub = gen_reg_rtx (mode);
3244 emit_constant_insn (cond,
3245 gen_rtx_SET (VOIDmode, sub,
3246 gen_rtx_AND (mode, source,
3248 emit_constant_insn (cond,
3249 gen_rtx_SET (VOIDmode, target,
3250 gen_rtx_NOT (mode, sub)));
3257 /* See if two shifts will do 2 or more insn's worth of work. */
3258 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3260 HOST_WIDE_INT shift_mask = ((0xffffffff
3261 << (32 - clear_sign_bit_copies))
3264 if ((remainder | shift_mask) != 0xffffffff)
3268 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3269 insns = arm_gen_constant (AND, mode, cond,
3270 remainder | shift_mask,
3271 new_src, source, subtargets, 1);
3276 rtx targ = subtargets ? NULL_RTX : target;
3277 insns = arm_gen_constant (AND, mode, cond,
3278 remainder | shift_mask,
3279 targ, source, subtargets, 0);
3285 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3286 rtx shift = GEN_INT (clear_sign_bit_copies);
3288 emit_insn (gen_ashlsi3 (new_src, source, shift));
3289 emit_insn (gen_lshrsi3 (target, new_src, shift));
3295 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3297 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3299 if ((remainder | shift_mask) != 0xffffffff)
3303 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3305 insns = arm_gen_constant (AND, mode, cond,
3306 remainder | shift_mask,
3307 new_src, source, subtargets, 1);
3312 rtx targ = subtargets ? NULL_RTX : target;
3314 insns = arm_gen_constant (AND, mode, cond,
3315 remainder | shift_mask,
3316 targ, source, subtargets, 0);
3322 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3323 rtx shift = GEN_INT (clear_zero_bit_copies);
3325 emit_insn (gen_lshrsi3 (new_src, source, shift));
3326 emit_insn (gen_ashlsi3 (target, new_src, shift));
3338 /* Calculate what the instruction sequences would be if we generated it
3339 normally, negated, or inverted. */
3341 /* AND cannot be split into multiple insns, so invert and use BIC. */
3344 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3347 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3352 if (can_invert || final_invert)
3353 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3358 immediates = &pos_immediates;
3360 /* Is the negated immediate sequence more efficient? */
3361 if (neg_insns < insns && neg_insns <= inv_insns)
3364 immediates = &neg_immediates;
3369 /* Is the inverted immediate sequence more efficient?
3370 We must allow for an extra NOT instruction for XOR operations, although
3371 there is some chance that the final 'mvn' will get optimized later. */
3372 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3375 immediates = &inv_immediates;
3383 /* Now output the chosen sequence as instructions. */
3386 for (i = 0; i < insns; i++)
3388 rtx new_src, temp1_rtx;
3390 temp1 = immediates->i[i];
3392 if (code == SET || code == MINUS)
3393 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3394 else if ((final_invert || i < (insns - 1)) && subtargets)
3395 new_src = gen_reg_rtx (mode);
3401 else if (can_negate)
3404 temp1 = trunc_int_for_mode (temp1, mode);
3405 temp1_rtx = GEN_INT (temp1);
3409 else if (code == MINUS)
3410 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3412 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3414 emit_constant_insn (cond,
3415 gen_rtx_SET (VOIDmode, new_src,
3421 can_negate = can_invert;
3425 else if (code == MINUS)
3433 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3434 gen_rtx_NOT (mode, source)));
3441 /* Canonicalize a comparison so that we are more likely to recognize it.
3442 This can be done for a few constant compares, where we can make the
3443 immediate value easier to load. */
3446 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3448 enum machine_mode mode;
3449 unsigned HOST_WIDE_INT i, maxval;
3451 mode = GET_MODE (*op0);
3452 if (mode == VOIDmode)
3453 mode = GET_MODE (*op1);
3455 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3457 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3458 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3459 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3460 for GTU/LEU in Thumb mode. */
3465 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3467 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3470 if (code == GT || code == LE
3471 || (!TARGET_ARM && (code == GTU || code == LEU)))
3473 /* Missing comparison. First try to use an available
3475 if (GET_CODE (*op1) == CONST_INT)
3483 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3485 *op1 = GEN_INT (i + 1);
3486 return code == GT ? GE : LT;
3491 if (i != ~((unsigned HOST_WIDE_INT) 0)
3492 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3494 *op1 = GEN_INT (i + 1);
3495 return code == GTU ? GEU : LTU;
3503 /* If that did not work, reverse the condition. */
3507 return swap_condition (code);
3513 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3514 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3515 to facilitate possible combining with a cmp into 'ands'. */
3517 && GET_CODE (*op0) == ZERO_EXTEND
3518 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3519 && GET_MODE (XEXP (*op0, 0)) == QImode
3520 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3521 && subreg_lowpart_p (XEXP (*op0, 0))
3522 && *op1 == const0_rtx)
3523 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3526 /* Comparisons smaller than DImode. Only adjust comparisons against
3527 an out-of-range constant. */
3528 if (GET_CODE (*op1) != CONST_INT
3529 || const_ok_for_arm (INTVAL (*op1))
3530 || const_ok_for_arm (- INTVAL (*op1)))
3544 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3546 *op1 = GEN_INT (i + 1);
3547 return code == GT ? GE : LT;
3554 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3556 *op1 = GEN_INT (i - 1);
3557 return code == GE ? GT : LE;
3563 if (i != ~((unsigned HOST_WIDE_INT) 0)
3564 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3566 *op1 = GEN_INT (i + 1);
3567 return code == GTU ? GEU : LTU;
3574 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3576 *op1 = GEN_INT (i - 1);
3577 return code == GEU ? GTU : LEU;
3589 /* Define how to find the value returned by a function. */
3592 arm_function_value(const_tree type, const_tree func,
3593 bool outgoing ATTRIBUTE_UNUSED)
3595 enum machine_mode mode;
3596 int unsignedp ATTRIBUTE_UNUSED;
3597 rtx r ATTRIBUTE_UNUSED;
3599 mode = TYPE_MODE (type);
3601 if (TARGET_AAPCS_BASED)
3602 return aapcs_allocate_return_reg (mode, type, func);
3604 /* Promote integer types. */
3605 if (INTEGRAL_TYPE_P (type))
3606 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3608 /* Promotes small structs returned in a register to full-word size
3609 for big-endian AAPCS. */
3610 if (arm_return_in_msb (type))
3612 HOST_WIDE_INT size = int_size_in_bytes (type);
3613 if (size % UNITS_PER_WORD != 0)
3615 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3616 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3620 return arm_libcall_value_1 (mode);
3624 libcall_eq (const void *p1, const void *p2)
3626 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3630 libcall_hash (const void *p1)
3632 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3636 add_libcall (htab_t htab, rtx libcall)
3638 *htab_find_slot (htab, libcall, INSERT) = libcall;
3642 arm_libcall_uses_aapcs_base (const_rtx libcall)
3644 static bool init_done = false;
3645 static htab_t libcall_htab;
3651 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3653 add_libcall (libcall_htab,
3654 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3655 add_libcall (libcall_htab,
3656 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3657 add_libcall (libcall_htab,
3658 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3659 add_libcall (libcall_htab,
3660 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3668 add_libcall (libcall_htab,
3669 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3671 add_libcall (libcall_htab,
3672 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3673 add_libcall (libcall_htab,
3674 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3675 add_libcall (libcall_htab,
3676 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3677 add_libcall (libcall_htab,
3678 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3679 add_libcall (libcall_htab,
3680 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3681 add_libcall (libcall_htab,
3682 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3683 add_libcall (libcall_htab,
3684 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3685 add_libcall (libcall_htab,
3686 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3688 /* Values from double-precision helper functions are returned in core
3689 registers if the selected core only supports single-precision
3690 arithmetic, even if we are using the hard-float ABI. The same is
3691 true for single-precision helpers, but we will never be using the
3692 hard-float ABI on a CPU which doesn't support single-precision
3693 operations in hardware. */
3694 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3695 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3696 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3697 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3698 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3699 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3700 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3701 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3702 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3703 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3704 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3705 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3707 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3711 return libcall && htab_find (libcall_htab, libcall) != NULL;
3715 arm_libcall_value_1 (enum machine_mode mode)
3717 if (TARGET_AAPCS_BASED)
3718 return aapcs_libcall_value (mode);
3719 else if (TARGET_32BIT
3720 && TARGET_HARD_FLOAT_ABI
3722 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3723 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3724 else if (TARGET_32BIT
3725 && TARGET_HARD_FLOAT_ABI
3727 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3728 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3729 else if (TARGET_IWMMXT_ABI
3730 && arm_vector_mode_supported_p (mode))
3731 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3733 return gen_rtx_REG (mode, ARG_REGISTER (1));
3736 /* Define how to find the value returned by a library function
3737 assuming the value has mode MODE. */
3740 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3742 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3743 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3745 /* The following libcalls return their result in integer registers,
3746 even though they return a floating point value. */
3747 if (arm_libcall_uses_aapcs_base (libcall))
3748 return gen_rtx_REG (mode, ARG_REGISTER(1));
3752 return arm_libcall_value_1 (mode);
3755 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3758 arm_function_value_regno_p (const unsigned int regno)
3760 if (regno == ARG_REGISTER (1)
3762 && TARGET_AAPCS_BASED
3764 && TARGET_HARD_FLOAT
3765 && regno == FIRST_VFP_REGNUM)
3767 && TARGET_HARD_FLOAT_ABI
3769 && regno == FIRST_CIRRUS_FP_REGNUM)
3770 || (TARGET_IWMMXT_ABI
3771 && regno == FIRST_IWMMXT_REGNUM)
3773 && TARGET_HARD_FLOAT_ABI
3775 && regno == FIRST_FPA_REGNUM))
3781 /* Determine the amount of memory needed to store the possible return
3782 registers of an untyped call. */
3784 arm_apply_result_size (void)
3790 if (TARGET_HARD_FLOAT_ABI)
3796 if (TARGET_MAVERICK)
3799 if (TARGET_IWMMXT_ABI)
3806 /* Decide whether TYPE should be returned in memory (true)
3807 or in a register (false). FNTYPE is the type of the function making
3810 arm_return_in_memory (const_tree type, const_tree fntype)
3814 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3816 if (TARGET_AAPCS_BASED)
3818 /* Simple, non-aggregate types (ie not including vectors and
3819 complex) are always returned in a register (or registers).
3820 We don't care about which register here, so we can short-cut
3821 some of the detail. */
3822 if (!AGGREGATE_TYPE_P (type)
3823 && TREE_CODE (type) != VECTOR_TYPE
3824 && TREE_CODE (type) != COMPLEX_TYPE)
3827 /* Any return value that is no larger than one word can be
3829 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3832 /* Check any available co-processors to see if they accept the
3833 type as a register candidate (VFP, for example, can return
3834 some aggregates in consecutive registers). These aren't
3835 available if the call is variadic. */
3836 if (aapcs_select_return_coproc (type, fntype) >= 0)
3839 /* Vector values should be returned using ARM registers, not
3840 memory (unless they're over 16 bytes, which will break since
3841 we only have four call-clobbered registers to play with). */
3842 if (TREE_CODE (type) == VECTOR_TYPE)
3843 return (size < 0 || size > (4 * UNITS_PER_WORD));
3845 /* The rest go in memory. */
3849 if (TREE_CODE (type) == VECTOR_TYPE)
3850 return (size < 0 || size > (4 * UNITS_PER_WORD));
3852 if (!AGGREGATE_TYPE_P (type) &&
3853 (TREE_CODE (type) != VECTOR_TYPE))
3854 /* All simple types are returned in registers. */
3857 if (arm_abi != ARM_ABI_APCS)
3859 /* ATPCS and later return aggregate types in memory only if they are
3860 larger than a word (or are variable size). */
3861 return (size < 0 || size > UNITS_PER_WORD);
3864 /* For the arm-wince targets we choose to be compatible with Microsoft's
3865 ARM and Thumb compilers, which always return aggregates in memory. */
3867 /* All structures/unions bigger than one word are returned in memory.
3868 Also catch the case where int_size_in_bytes returns -1. In this case
3869 the aggregate is either huge or of variable size, and in either case
3870 we will want to return it via memory and not in a register. */
3871 if (size < 0 || size > UNITS_PER_WORD)
3874 if (TREE_CODE (type) == RECORD_TYPE)
3878 /* For a struct the APCS says that we only return in a register
3879 if the type is 'integer like' and every addressable element
3880 has an offset of zero. For practical purposes this means
3881 that the structure can have at most one non bit-field element
3882 and that this element must be the first one in the structure. */
3884 /* Find the first field, ignoring non FIELD_DECL things which will
3885 have been created by C++. */
3886 for (field = TYPE_FIELDS (type);
3887 field && TREE_CODE (field) != FIELD_DECL;
3888 field = DECL_CHAIN (field))
3892 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3894 /* Check that the first field is valid for returning in a register. */
3896 /* ... Floats are not allowed */
3897 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3900 /* ... Aggregates that are not themselves valid for returning in
3901 a register are not allowed. */
3902 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3905 /* Now check the remaining fields, if any. Only bitfields are allowed,
3906 since they are not addressable. */
3907 for (field = DECL_CHAIN (field);
3909 field = DECL_CHAIN (field))
3911 if (TREE_CODE (field) != FIELD_DECL)
3914 if (!DECL_BIT_FIELD_TYPE (field))
3921 if (TREE_CODE (type) == UNION_TYPE)
3925 /* Unions can be returned in registers if every element is
3926 integral, or can be returned in an integer register. */
3927 for (field = TYPE_FIELDS (type);
3929 field = DECL_CHAIN (field))
3931 if (TREE_CODE (field) != FIELD_DECL)
3934 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3937 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3943 #endif /* not ARM_WINCE */
3945 /* Return all other types in memory. */
3949 /* Indicate whether or not words of a double are in big-endian order. */
3952 arm_float_words_big_endian (void)
3954 if (TARGET_MAVERICK)
3957 /* For FPA, float words are always big-endian. For VFP, floats words
3958 follow the memory system mode. */
3966 return (TARGET_BIG_END ? 1 : 0);
3971 const struct pcs_attribute_arg
3975 } pcs_attribute_args[] =
3977 {"aapcs", ARM_PCS_AAPCS},
3978 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3980 /* We could recognize these, but changes would be needed elsewhere
3981 * to implement them. */
3982 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3983 {"atpcs", ARM_PCS_ATPCS},
3984 {"apcs", ARM_PCS_APCS},
3986 {NULL, ARM_PCS_UNKNOWN}
3990 arm_pcs_from_attribute (tree attr)
3992 const struct pcs_attribute_arg *ptr;
3995 /* Get the value of the argument. */
3996 if (TREE_VALUE (attr) == NULL_TREE
3997 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3998 return ARM_PCS_UNKNOWN;
4000 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4002 /* Check it against the list of known arguments. */
4003 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4004 if (streq (arg, ptr->arg))
4007 /* An unrecognized interrupt type. */
4008 return ARM_PCS_UNKNOWN;
4011 /* Get the PCS variant to use for this call. TYPE is the function's type
4012 specification, DECL is the specific declartion. DECL may be null if
4013 the call could be indirect or if this is a library call. */
4015 arm_get_pcs_model (const_tree type, const_tree decl)
4017 bool user_convention = false;
4018 enum arm_pcs user_pcs = arm_pcs_default;
4023 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4026 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4027 user_convention = true;
4030 if (TARGET_AAPCS_BASED)
4032 /* Detect varargs functions. These always use the base rules
4033 (no argument is ever a candidate for a co-processor
4035 bool base_rules = stdarg_p (type);
4037 if (user_convention)
4039 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4040 sorry ("non-AAPCS derived PCS variant");
4041 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4042 error ("variadic functions must use the base AAPCS variant");
4046 return ARM_PCS_AAPCS;
4047 else if (user_convention)
4049 else if (decl && flag_unit_at_a_time)
4051 /* Local functions never leak outside this compilation unit,
4052 so we are free to use whatever conventions are
4054 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4055 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4057 return ARM_PCS_AAPCS_LOCAL;
4060 else if (user_convention && user_pcs != arm_pcs_default)
4061 sorry ("PCS variant");
4063 /* For everything else we use the target's default. */
4064 return arm_pcs_default;
4069 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4070 const_tree fntype ATTRIBUTE_UNUSED,
4071 rtx libcall ATTRIBUTE_UNUSED,
4072 const_tree fndecl ATTRIBUTE_UNUSED)
4074 /* Record the unallocated VFP registers. */
4075 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4076 pcum->aapcs_vfp_reg_alloc = 0;
4079 /* Walk down the type tree of TYPE counting consecutive base elements.
4080 If *MODEP is VOIDmode, then set it to the first valid floating point
4081 type. If a non-floating point type is found, or if a floating point
4082 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4083 otherwise return the count in the sub-tree. */
4085 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4087 enum machine_mode mode;
4090 switch (TREE_CODE (type))
4093 mode = TYPE_MODE (type);
4094 if (mode != DFmode && mode != SFmode)
4097 if (*modep == VOIDmode)
4106 mode = TYPE_MODE (TREE_TYPE (type));
4107 if (mode != DFmode && mode != SFmode)
4110 if (*modep == VOIDmode)
4119 /* Use V2SImode and V4SImode as representatives of all 64-bit
4120 and 128-bit vector types, whether or not those modes are
4121 supported with the present options. */
4122 size = int_size_in_bytes (type);
4135 if (*modep == VOIDmode)
4138 /* Vector modes are considered to be opaque: two vectors are
4139 equivalent for the purposes of being homogeneous aggregates
4140 if they are the same size. */
4149 tree index = TYPE_DOMAIN (type);
4151 /* Can't handle incomplete types. */
4152 if (!COMPLETE_TYPE_P(type))
4155 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4158 || !TYPE_MAX_VALUE (index)
4159 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4160 || !TYPE_MIN_VALUE (index)
4161 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4165 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4166 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4168 /* There must be no padding. */
4169 if (!host_integerp (TYPE_SIZE (type), 1)
4170 || (tree_low_cst (TYPE_SIZE (type), 1)
4171 != count * GET_MODE_BITSIZE (*modep)))
4183 /* Can't handle incomplete types. */
4184 if (!COMPLETE_TYPE_P(type))
4187 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4189 if (TREE_CODE (field) != FIELD_DECL)
4192 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4198 /* There must be no padding. */
4199 if (!host_integerp (TYPE_SIZE (type), 1)
4200 || (tree_low_cst (TYPE_SIZE (type), 1)
4201 != count * GET_MODE_BITSIZE (*modep)))
4208 case QUAL_UNION_TYPE:
4210 /* These aren't very interesting except in a degenerate case. */
4215 /* Can't handle incomplete types. */
4216 if (!COMPLETE_TYPE_P(type))
4219 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4221 if (TREE_CODE (field) != FIELD_DECL)
4224 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4227 count = count > sub_count ? count : sub_count;
4230 /* There must be no padding. */
4231 if (!host_integerp (TYPE_SIZE (type), 1)
4232 || (tree_low_cst (TYPE_SIZE (type), 1)
4233 != count * GET_MODE_BITSIZE (*modep)))
4246 /* Return true if PCS_VARIANT should use VFP registers. */
4248 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4250 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4252 static bool seen_thumb1_vfp = false;
4254 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4256 sorry ("Thumb-1 hard-float VFP ABI");
4257 /* sorry() is not immediately fatal, so only display this once. */
4258 seen_thumb1_vfp = true;
4264 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4267 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4268 (TARGET_VFP_DOUBLE || !is_double));
4271 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4272 suitable for passing or returning in VFP registers for the PCS
4273 variant selected. If it is, then *BASE_MODE is updated to contain
4274 a machine mode describing each element of the argument's type and
4275 *COUNT to hold the number of such elements. */
4277 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4278 enum machine_mode mode, const_tree type,
4279 enum machine_mode *base_mode, int *count)
4281 enum machine_mode new_mode = VOIDmode;
4283 /* If we have the type information, prefer that to working things
4284 out from the mode. */
4287 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4289 if (ag_count > 0 && ag_count <= 4)
4294 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4295 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4296 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4301 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4304 new_mode = (mode == DCmode ? DFmode : SFmode);
4310 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4313 *base_mode = new_mode;
4318 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4319 enum machine_mode mode, const_tree type)
4321 int count ATTRIBUTE_UNUSED;
4322 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4324 if (!use_vfp_abi (pcs_variant, false))
4326 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4331 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4334 if (!use_vfp_abi (pcum->pcs_variant, false))
4337 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4338 &pcum->aapcs_vfp_rmode,
4339 &pcum->aapcs_vfp_rcount);
4343 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4344 const_tree type ATTRIBUTE_UNUSED)
4346 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4347 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4350 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4351 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4353 pcum->aapcs_vfp_reg_alloc = mask << regno;
4354 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4357 int rcount = pcum->aapcs_vfp_rcount;
4359 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4363 /* Avoid using unsupported vector modes. */
4364 if (rmode == V2SImode)
4366 else if (rmode == V4SImode)
4373 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4374 for (i = 0; i < rcount; i++)
4376 rtx tmp = gen_rtx_REG (rmode,
4377 FIRST_VFP_REGNUM + regno + i * rshift);
4378 tmp = gen_rtx_EXPR_LIST
4380 GEN_INT (i * GET_MODE_SIZE (rmode)));
4381 XVECEXP (par, 0, i) = tmp;
4384 pcum->aapcs_reg = par;
4387 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4394 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4395 enum machine_mode mode,
4396 const_tree type ATTRIBUTE_UNUSED)
4398 if (!use_vfp_abi (pcs_variant, false))
4401 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4404 enum machine_mode ag_mode;
4409 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4414 if (ag_mode == V2SImode)
4416 else if (ag_mode == V4SImode)
4422 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4423 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4424 for (i = 0; i < count; i++)
4426 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4427 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4428 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4429 XVECEXP (par, 0, i) = tmp;
4435 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4439 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4440 enum machine_mode mode ATTRIBUTE_UNUSED,
4441 const_tree type ATTRIBUTE_UNUSED)
4443 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4444 pcum->aapcs_vfp_reg_alloc = 0;
4448 #define AAPCS_CP(X) \
4450 aapcs_ ## X ## _cum_init, \
4451 aapcs_ ## X ## _is_call_candidate, \
4452 aapcs_ ## X ## _allocate, \
4453 aapcs_ ## X ## _is_return_candidate, \
4454 aapcs_ ## X ## _allocate_return_reg, \
4455 aapcs_ ## X ## _advance \
4458 /* Table of co-processors that can be used to pass arguments in
4459 registers. Idealy no arugment should be a candidate for more than
4460 one co-processor table entry, but the table is processed in order
4461 and stops after the first match. If that entry then fails to put
4462 the argument into a co-processor register, the argument will go on
4466 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4467 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4469 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4470 BLKmode) is a candidate for this co-processor's registers; this
4471 function should ignore any position-dependent state in
4472 CUMULATIVE_ARGS and only use call-type dependent information. */
4473 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4475 /* Return true if the argument does get a co-processor register; it
4476 should set aapcs_reg to an RTX of the register allocated as is
4477 required for a return from FUNCTION_ARG. */
4478 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4480 /* Return true if a result of mode MODE (or type TYPE if MODE is
4481 BLKmode) is can be returned in this co-processor's registers. */
4482 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4484 /* Allocate and return an RTX element to hold the return type of a
4485 call, this routine must not fail and will only be called if
4486 is_return_candidate returned true with the same parameters. */
4487 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4489 /* Finish processing this argument and prepare to start processing
4491 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4492 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4500 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4505 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4506 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4513 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4515 /* We aren't passed a decl, so we can't check that a call is local.
4516 However, it isn't clear that that would be a win anyway, since it
4517 might limit some tail-calling opportunities. */
4518 enum arm_pcs pcs_variant;
4522 const_tree fndecl = NULL_TREE;
4524 if (TREE_CODE (fntype) == FUNCTION_DECL)
4527 fntype = TREE_TYPE (fntype);
4530 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4533 pcs_variant = arm_pcs_default;
4535 if (pcs_variant != ARM_PCS_AAPCS)
4539 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4540 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4549 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4552 /* We aren't passed a decl, so we can't check that a call is local.
4553 However, it isn't clear that that would be a win anyway, since it
4554 might limit some tail-calling opportunities. */
4555 enum arm_pcs pcs_variant;
4556 int unsignedp ATTRIBUTE_UNUSED;
4560 const_tree fndecl = NULL_TREE;
4562 if (TREE_CODE (fntype) == FUNCTION_DECL)
4565 fntype = TREE_TYPE (fntype);
4568 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4571 pcs_variant = arm_pcs_default;
4573 /* Promote integer types. */
4574 if (type && INTEGRAL_TYPE_P (type))
4575 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4577 if (pcs_variant != ARM_PCS_AAPCS)
4581 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4582 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4584 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4588 /* Promotes small structs returned in a register to full-word size
4589 for big-endian AAPCS. */
4590 if (type && arm_return_in_msb (type))
4592 HOST_WIDE_INT size = int_size_in_bytes (type);
4593 if (size % UNITS_PER_WORD != 0)
4595 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4596 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4600 return gen_rtx_REG (mode, R0_REGNUM);
4604 aapcs_libcall_value (enum machine_mode mode)
4606 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4607 && GET_MODE_SIZE (mode) <= 4)
4610 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4613 /* Lay out a function argument using the AAPCS rules. The rule
4614 numbers referred to here are those in the AAPCS. */
4616 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4617 const_tree type, bool named)
4622 /* We only need to do this once per argument. */
4623 if (pcum->aapcs_arg_processed)
4626 pcum->aapcs_arg_processed = true;
4628 /* Special case: if named is false then we are handling an incoming
4629 anonymous argument which is on the stack. */
4633 /* Is this a potential co-processor register candidate? */
4634 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4636 int slot = aapcs_select_call_coproc (pcum, mode, type);
4637 pcum->aapcs_cprc_slot = slot;
4639 /* We don't have to apply any of the rules from part B of the
4640 preparation phase, these are handled elsewhere in the
4645 /* A Co-processor register candidate goes either in its own
4646 class of registers or on the stack. */
4647 if (!pcum->aapcs_cprc_failed[slot])
4649 /* C1.cp - Try to allocate the argument to co-processor
4651 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4654 /* C2.cp - Put the argument on the stack and note that we
4655 can't assign any more candidates in this slot. We also
4656 need to note that we have allocated stack space, so that
4657 we won't later try to split a non-cprc candidate between
4658 core registers and the stack. */
4659 pcum->aapcs_cprc_failed[slot] = true;
4660 pcum->can_split = false;
4663 /* We didn't get a register, so this argument goes on the
4665 gcc_assert (pcum->can_split == false);
4670 /* C3 - For double-word aligned arguments, round the NCRN up to the
4671 next even number. */
4672 ncrn = pcum->aapcs_ncrn;
4673 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4676 nregs = ARM_NUM_REGS2(mode, type);
4678 /* Sigh, this test should really assert that nregs > 0, but a GCC
4679 extension allows empty structs and then gives them empty size; it
4680 then allows such a structure to be passed by value. For some of
4681 the code below we have to pretend that such an argument has
4682 non-zero size so that we 'locate' it correctly either in
4683 registers or on the stack. */
4684 gcc_assert (nregs >= 0);
4686 nregs2 = nregs ? nregs : 1;
4688 /* C4 - Argument fits entirely in core registers. */
4689 if (ncrn + nregs2 <= NUM_ARG_REGS)
4691 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4692 pcum->aapcs_next_ncrn = ncrn + nregs;
4696 /* C5 - Some core registers left and there are no arguments already
4697 on the stack: split this argument between the remaining core
4698 registers and the stack. */
4699 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4701 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4702 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4703 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4707 /* C6 - NCRN is set to 4. */
4708 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4710 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4714 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4715 for a call to a function whose data type is FNTYPE.
4716 For a library call, FNTYPE is NULL. */
4718 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4720 tree fndecl ATTRIBUTE_UNUSED)
4722 /* Long call handling. */
4724 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4726 pcum->pcs_variant = arm_pcs_default;
4728 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4730 if (arm_libcall_uses_aapcs_base (libname))
4731 pcum->pcs_variant = ARM_PCS_AAPCS;
4733 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4734 pcum->aapcs_reg = NULL_RTX;
4735 pcum->aapcs_partial = 0;
4736 pcum->aapcs_arg_processed = false;
4737 pcum->aapcs_cprc_slot = -1;
4738 pcum->can_split = true;
4740 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4744 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4746 pcum->aapcs_cprc_failed[i] = false;
4747 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4755 /* On the ARM, the offset starts at 0. */
4757 pcum->iwmmxt_nregs = 0;
4758 pcum->can_split = true;
4760 /* Varargs vectors are treated the same as long long.
4761 named_count avoids having to change the way arm handles 'named' */
4762 pcum->named_count = 0;
4765 if (TARGET_REALLY_IWMMXT && fntype)
4769 for (fn_arg = TYPE_ARG_TYPES (fntype);
4771 fn_arg = TREE_CHAIN (fn_arg))
4772 pcum->named_count += 1;
4774 if (! pcum->named_count)
4775 pcum->named_count = INT_MAX;
4780 /* Return true if mode/type need doubleword alignment. */
4782 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4784 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4785 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4789 /* Determine where to put an argument to a function.
4790 Value is zero to push the argument on the stack,
4791 or a hard register in which to store the argument.
4793 MODE is the argument's machine mode.
4794 TYPE is the data type of the argument (as a tree).
4795 This is null for libcalls where that information may
4797 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4798 the preceding args and about the function being called.
4799 NAMED is nonzero if this argument is a named parameter
4800 (otherwise it is an extra parameter matching an ellipsis).
4802 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4803 other arguments are passed on the stack. If (NAMED == 0) (which happens
4804 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4805 defined), say it is passed in the stack (function_prologue will
4806 indeed make it pass in the stack if necessary). */
4809 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4810 const_tree type, bool named)
4812 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4815 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4816 a call insn (op3 of a call_value insn). */
4817 if (mode == VOIDmode)
4820 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4822 aapcs_layout_arg (pcum, mode, type, named);
4823 return pcum->aapcs_reg;
4826 /* Varargs vectors are treated the same as long long.
4827 named_count avoids having to change the way arm handles 'named' */
4828 if (TARGET_IWMMXT_ABI
4829 && arm_vector_mode_supported_p (mode)
4830 && pcum->named_count > pcum->nargs + 1)
4832 if (pcum->iwmmxt_nregs <= 9)
4833 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4836 pcum->can_split = false;
4841 /* Put doubleword aligned quantities in even register pairs. */
4843 && ARM_DOUBLEWORD_ALIGN
4844 && arm_needs_doubleword_align (mode, type))
4847 /* Only allow splitting an arg between regs and memory if all preceding
4848 args were allocated to regs. For args passed by reference we only count
4849 the reference pointer. */
4850 if (pcum->can_split)
4853 nregs = ARM_NUM_REGS2 (mode, type);
4855 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4858 return gen_rtx_REG (mode, pcum->nregs);
4862 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4864 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4865 ? DOUBLEWORD_ALIGNMENT
4870 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4871 tree type, bool named)
4873 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4874 int nregs = pcum->nregs;
4876 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4878 aapcs_layout_arg (pcum, mode, type, named);
4879 return pcum->aapcs_partial;
4882 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4885 if (NUM_ARG_REGS > nregs
4886 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4888 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4893 /* Update the data in PCUM to advance over an argument
4894 of mode MODE and data type TYPE.
4895 (TYPE is null for libcalls where that information may not be available.) */
4898 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4899 const_tree type, bool named)
4901 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4903 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4905 aapcs_layout_arg (pcum, mode, type, named);
4907 if (pcum->aapcs_cprc_slot >= 0)
4909 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4911 pcum->aapcs_cprc_slot = -1;
4914 /* Generic stuff. */
4915 pcum->aapcs_arg_processed = false;
4916 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4917 pcum->aapcs_reg = NULL_RTX;
4918 pcum->aapcs_partial = 0;
4923 if (arm_vector_mode_supported_p (mode)
4924 && pcum->named_count > pcum->nargs
4925 && TARGET_IWMMXT_ABI)
4926 pcum->iwmmxt_nregs += 1;
4928 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4932 /* Variable sized types are passed by reference. This is a GCC
4933 extension to the ARM ABI. */
4936 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4937 enum machine_mode mode ATTRIBUTE_UNUSED,
4938 const_tree type, bool named ATTRIBUTE_UNUSED)
4940 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4943 /* Encode the current state of the #pragma [no_]long_calls. */
4946 OFF, /* No #pragma [no_]long_calls is in effect. */
4947 LONG, /* #pragma long_calls is in effect. */
4948 SHORT /* #pragma no_long_calls is in effect. */
4951 static arm_pragma_enum arm_pragma_long_calls = OFF;
4954 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4956 arm_pragma_long_calls = LONG;
4960 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4962 arm_pragma_long_calls = SHORT;
4966 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4968 arm_pragma_long_calls = OFF;
4971 /* Handle an attribute requiring a FUNCTION_DECL;
4972 arguments as in struct attribute_spec.handler. */
4974 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4975 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4977 if (TREE_CODE (*node) != FUNCTION_DECL)
4979 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4981 *no_add_attrs = true;
4987 /* Handle an "interrupt" or "isr" attribute;
4988 arguments as in struct attribute_spec.handler. */
4990 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4995 if (TREE_CODE (*node) != FUNCTION_DECL)
4997 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4999 *no_add_attrs = true;
5001 /* FIXME: the argument if any is checked for type attributes;
5002 should it be checked for decl ones? */
5006 if (TREE_CODE (*node) == FUNCTION_TYPE
5007 || TREE_CODE (*node) == METHOD_TYPE)
5009 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5011 warning (OPT_Wattributes, "%qE attribute ignored",
5013 *no_add_attrs = true;
5016 else if (TREE_CODE (*node) == POINTER_TYPE
5017 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5018 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5019 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5021 *node = build_variant_type_copy (*node);
5022 TREE_TYPE (*node) = build_type_attribute_variant
5024 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5025 *no_add_attrs = true;
5029 /* Possibly pass this attribute on from the type to a decl. */
5030 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5031 | (int) ATTR_FLAG_FUNCTION_NEXT
5032 | (int) ATTR_FLAG_ARRAY_NEXT))
5034 *no_add_attrs = true;
5035 return tree_cons (name, args, NULL_TREE);
5039 warning (OPT_Wattributes, "%qE attribute ignored",
5048 /* Handle a "pcs" attribute; arguments as in struct
5049 attribute_spec.handler. */
5051 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5052 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5054 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5056 warning (OPT_Wattributes, "%qE attribute ignored", name);
5057 *no_add_attrs = true;
5062 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5063 /* Handle the "notshared" attribute. This attribute is another way of
5064 requesting hidden visibility. ARM's compiler supports
5065 "__declspec(notshared)"; we support the same thing via an
5069 arm_handle_notshared_attribute (tree *node,
5070 tree name ATTRIBUTE_UNUSED,
5071 tree args ATTRIBUTE_UNUSED,
5072 int flags ATTRIBUTE_UNUSED,
5075 tree decl = TYPE_NAME (*node);
5079 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5080 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5081 *no_add_attrs = false;
5087 /* Return 0 if the attributes for two types are incompatible, 1 if they
5088 are compatible, and 2 if they are nearly compatible (which causes a
5089 warning to be generated). */
5091 arm_comp_type_attributes (const_tree type1, const_tree type2)
5095 /* Check for mismatch of non-default calling convention. */
5096 if (TREE_CODE (type1) != FUNCTION_TYPE)
5099 /* Check for mismatched call attributes. */
5100 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5101 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5102 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5103 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5105 /* Only bother to check if an attribute is defined. */
5106 if (l1 | l2 | s1 | s2)
5108 /* If one type has an attribute, the other must have the same attribute. */
5109 if ((l1 != l2) || (s1 != s2))
5112 /* Disallow mixed attributes. */
5113 if ((l1 & s2) || (l2 & s1))
5117 /* Check for mismatched ISR attribute. */
5118 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5120 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5121 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5123 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5130 /* Assigns default attributes to newly defined type. This is used to
5131 set short_call/long_call attributes for function types of
5132 functions defined inside corresponding #pragma scopes. */
5134 arm_set_default_type_attributes (tree type)
5136 /* Add __attribute__ ((long_call)) to all functions, when
5137 inside #pragma long_calls or __attribute__ ((short_call)),
5138 when inside #pragma no_long_calls. */
5139 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5141 tree type_attr_list, attr_name;
5142 type_attr_list = TYPE_ATTRIBUTES (type);
5144 if (arm_pragma_long_calls == LONG)
5145 attr_name = get_identifier ("long_call");
5146 else if (arm_pragma_long_calls == SHORT)
5147 attr_name = get_identifier ("short_call");
5151 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5152 TYPE_ATTRIBUTES (type) = type_attr_list;
5156 /* Return true if DECL is known to be linked into section SECTION. */
5159 arm_function_in_section_p (tree decl, section *section)
5161 /* We can only be certain about functions defined in the same
5162 compilation unit. */
5163 if (!TREE_STATIC (decl))
5166 /* Make sure that SYMBOL always binds to the definition in this
5167 compilation unit. */
5168 if (!targetm.binds_local_p (decl))
5171 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5172 if (!DECL_SECTION_NAME (decl))
5174 /* Make sure that we will not create a unique section for DECL. */
5175 if (flag_function_sections || DECL_ONE_ONLY (decl))
5179 return function_section (decl) == section;
5182 /* Return nonzero if a 32-bit "long_call" should be generated for
5183 a call from the current function to DECL. We generate a long_call
5186 a. has an __attribute__((long call))
5187 or b. is within the scope of a #pragma long_calls
5188 or c. the -mlong-calls command line switch has been specified
5190 However we do not generate a long call if the function:
5192 d. has an __attribute__ ((short_call))
5193 or e. is inside the scope of a #pragma no_long_calls
5194 or f. is defined in the same section as the current function. */
5197 arm_is_long_call_p (tree decl)
5202 return TARGET_LONG_CALLS;
5204 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5205 if (lookup_attribute ("short_call", attrs))
5208 /* For "f", be conservative, and only cater for cases in which the
5209 whole of the current function is placed in the same section. */
5210 if (!flag_reorder_blocks_and_partition
5211 && TREE_CODE (decl) == FUNCTION_DECL
5212 && arm_function_in_section_p (decl, current_function_section ()))
5215 if (lookup_attribute ("long_call", attrs))
5218 return TARGET_LONG_CALLS;
5221 /* Return nonzero if it is ok to make a tail-call to DECL. */
5223 arm_function_ok_for_sibcall (tree decl, tree exp)
5225 unsigned long func_type;
5227 if (cfun->machine->sibcall_blocked)
5230 /* Never tailcall something for which we have no decl, or if we
5231 are generating code for Thumb-1. */
5232 if (decl == NULL || TARGET_THUMB1)
5235 /* The PIC register is live on entry to VxWorks PLT entries, so we
5236 must make the call before restoring the PIC register. */
5237 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5240 /* Cannot tail-call to long calls, since these are out of range of
5241 a branch instruction. */
5242 if (arm_is_long_call_p (decl))
5245 /* If we are interworking and the function is not declared static
5246 then we can't tail-call it unless we know that it exists in this
5247 compilation unit (since it might be a Thumb routine). */
5248 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5251 func_type = arm_current_func_type ();
5252 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5253 if (IS_INTERRUPT (func_type))
5256 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5258 /* Check that the return value locations are the same. For
5259 example that we aren't returning a value from the sibling in
5260 a VFP register but then need to transfer it to a core
5264 a = arm_function_value (TREE_TYPE (exp), decl, false);
5265 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5267 if (!rtx_equal_p (a, b))
5271 /* Never tailcall if function may be called with a misaligned SP. */
5272 if (IS_STACKALIGN (func_type))
5275 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5276 references should become a NOP. Don't convert such calls into
5278 if (TARGET_AAPCS_BASED
5279 && arm_abi == ARM_ABI_AAPCS
5280 && DECL_WEAK (decl))
5283 /* Everything else is ok. */
5288 /* Addressing mode support functions. */
5290 /* Return nonzero if X is a legitimate immediate operand when compiling
5291 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5293 legitimate_pic_operand_p (rtx x)
5295 if (GET_CODE (x) == SYMBOL_REF
5296 || (GET_CODE (x) == CONST
5297 && GET_CODE (XEXP (x, 0)) == PLUS
5298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5304 /* Record that the current function needs a PIC register. Initialize
5305 cfun->machine->pic_reg if we have not already done so. */
5308 require_pic_register (void)
5310 /* A lot of the logic here is made obscure by the fact that this
5311 routine gets called as part of the rtx cost estimation process.
5312 We don't want those calls to affect any assumptions about the real
5313 function; and further, we can't call entry_of_function() until we
5314 start the real expansion process. */
5315 if (!crtl->uses_pic_offset_table)
5317 gcc_assert (can_create_pseudo_p ());
5318 if (arm_pic_register != INVALID_REGNUM)
5320 if (!cfun->machine->pic_reg)
5321 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5323 /* Play games to avoid marking the function as needing pic
5324 if we are being called as part of the cost-estimation
5326 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5327 crtl->uses_pic_offset_table = 1;
5333 if (!cfun->machine->pic_reg)
5334 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5336 /* Play games to avoid marking the function as needing pic
5337 if we are being called as part of the cost-estimation
5339 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5341 crtl->uses_pic_offset_table = 1;
5344 arm_load_pic_register (0UL);
5349 for (insn = seq; insn; insn = NEXT_INSN (insn))
5351 INSN_LOCATOR (insn) = prologue_locator;
5353 /* We can be called during expansion of PHI nodes, where
5354 we can't yet emit instructions directly in the final
5355 insn stream. Queue the insns on the entry edge, they will
5356 be committed after everything else is expanded. */
5357 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5364 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5366 if (GET_CODE (orig) == SYMBOL_REF
5367 || GET_CODE (orig) == LABEL_REF)
5373 gcc_assert (can_create_pseudo_p ());
5374 reg = gen_reg_rtx (Pmode);
5377 /* VxWorks does not impose a fixed gap between segments; the run-time
5378 gap can be different from the object-file gap. We therefore can't
5379 use GOTOFF unless we are absolutely sure that the symbol is in the
5380 same segment as the GOT. Unfortunately, the flexibility of linker
5381 scripts means that we can't be sure of that in general, so assume
5382 that GOTOFF is never valid on VxWorks. */
5383 if ((GET_CODE (orig) == LABEL_REF
5384 || (GET_CODE (orig) == SYMBOL_REF &&
5385 SYMBOL_REF_LOCAL_P (orig)))
5387 && !TARGET_VXWORKS_RTP)
5388 insn = arm_pic_static_addr (orig, reg);
5394 /* If this function doesn't have a pic register, create one now. */
5395 require_pic_register ();
5397 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5399 /* Make the MEM as close to a constant as possible. */
5400 mem = SET_SRC (pat);
5401 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5402 MEM_READONLY_P (mem) = 1;
5403 MEM_NOTRAP_P (mem) = 1;
5405 insn = emit_insn (pat);
5408 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5410 set_unique_reg_note (insn, REG_EQUAL, orig);
5414 else if (GET_CODE (orig) == CONST)
5418 if (GET_CODE (XEXP (orig, 0)) == PLUS
5419 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5422 /* Handle the case where we have: const (UNSPEC_TLS). */
5423 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5424 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5427 /* Handle the case where we have:
5428 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5430 if (GET_CODE (XEXP (orig, 0)) == PLUS
5431 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5432 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5434 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5440 gcc_assert (can_create_pseudo_p ());
5441 reg = gen_reg_rtx (Pmode);
5444 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5446 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5447 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5448 base == reg ? 0 : reg);
5450 if (GET_CODE (offset) == CONST_INT)
5452 /* The base register doesn't really matter, we only want to
5453 test the index for the appropriate mode. */
5454 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5456 gcc_assert (can_create_pseudo_p ());
5457 offset = force_reg (Pmode, offset);
5460 if (GET_CODE (offset) == CONST_INT)
5461 return plus_constant (base, INTVAL (offset));
5464 if (GET_MODE_SIZE (mode) > 4
5465 && (GET_MODE_CLASS (mode) == MODE_INT
5466 || TARGET_SOFT_FLOAT))
5468 emit_insn (gen_addsi3 (reg, base, offset));
5472 return gen_rtx_PLUS (Pmode, base, offset);
5479 /* Find a spare register to use during the prolog of a function. */
5482 thumb_find_work_register (unsigned long pushed_regs_mask)
5486 /* Check the argument registers first as these are call-used. The
5487 register allocation order means that sometimes r3 might be used
5488 but earlier argument registers might not, so check them all. */
5489 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5490 if (!df_regs_ever_live_p (reg))
5493 /* Before going on to check the call-saved registers we can try a couple
5494 more ways of deducing that r3 is available. The first is when we are
5495 pushing anonymous arguments onto the stack and we have less than 4
5496 registers worth of fixed arguments(*). In this case r3 will be part of
5497 the variable argument list and so we can be sure that it will be
5498 pushed right at the start of the function. Hence it will be available
5499 for the rest of the prologue.
5500 (*): ie crtl->args.pretend_args_size is greater than 0. */
5501 if (cfun->machine->uses_anonymous_args
5502 && crtl->args.pretend_args_size > 0)
5503 return LAST_ARG_REGNUM;
5505 /* The other case is when we have fixed arguments but less than 4 registers
5506 worth. In this case r3 might be used in the body of the function, but
5507 it is not being used to convey an argument into the function. In theory
5508 we could just check crtl->args.size to see how many bytes are
5509 being passed in argument registers, but it seems that it is unreliable.
5510 Sometimes it will have the value 0 when in fact arguments are being
5511 passed. (See testcase execute/20021111-1.c for an example). So we also
5512 check the args_info.nregs field as well. The problem with this field is
5513 that it makes no allowances for arguments that are passed to the
5514 function but which are not used. Hence we could miss an opportunity
5515 when a function has an unused argument in r3. But it is better to be
5516 safe than to be sorry. */
5517 if (! cfun->machine->uses_anonymous_args
5518 && crtl->args.size >= 0
5519 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5520 && crtl->args.info.nregs < 4)
5521 return LAST_ARG_REGNUM;
5523 /* Otherwise look for a call-saved register that is going to be pushed. */
5524 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5525 if (pushed_regs_mask & (1 << reg))
5530 /* Thumb-2 can use high regs. */
5531 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5532 if (pushed_regs_mask & (1 << reg))
5535 /* Something went wrong - thumb_compute_save_reg_mask()
5536 should have arranged for a suitable register to be pushed. */
5540 static GTY(()) int pic_labelno;
5542 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5546 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5548 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5550 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5553 gcc_assert (flag_pic);
5555 pic_reg = cfun->machine->pic_reg;
5556 if (TARGET_VXWORKS_RTP)
5558 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5559 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5560 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5562 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5564 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5565 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5569 /* We use an UNSPEC rather than a LABEL_REF because this label
5570 never appears in the code stream. */
5572 labelno = GEN_INT (pic_labelno++);
5573 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5574 l1 = gen_rtx_CONST (VOIDmode, l1);
5576 /* On the ARM the PC register contains 'dot + 8' at the time of the
5577 addition, on the Thumb it is 'dot + 4'. */
5578 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5579 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5581 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5585 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5587 else /* TARGET_THUMB1 */
5589 if (arm_pic_register != INVALID_REGNUM
5590 && REGNO (pic_reg) > LAST_LO_REGNUM)
5592 /* We will have pushed the pic register, so we should always be
5593 able to find a work register. */
5594 pic_tmp = gen_rtx_REG (SImode,
5595 thumb_find_work_register (saved_regs));
5596 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5597 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5598 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5601 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5605 /* Need to emit this whether or not we obey regdecls,
5606 since setjmp/longjmp can cause life info to screw up. */
5610 /* Generate code to load the address of a static var when flag_pic is set. */
5612 arm_pic_static_addr (rtx orig, rtx reg)
5614 rtx l1, labelno, offset_rtx, insn;
5616 gcc_assert (flag_pic);
5618 /* We use an UNSPEC rather than a LABEL_REF because this label
5619 never appears in the code stream. */
5620 labelno = GEN_INT (pic_labelno++);
5621 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5622 l1 = gen_rtx_CONST (VOIDmode, l1);
5624 /* On the ARM the PC register contains 'dot + 8' at the time of the
5625 addition, on the Thumb it is 'dot + 4'. */
5626 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5627 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5628 UNSPEC_SYMBOL_OFFSET);
5629 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5631 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5635 /* Return nonzero if X is valid as an ARM state addressing register. */
5637 arm_address_register_rtx_p (rtx x, int strict_p)
5641 if (GET_CODE (x) != REG)
5647 return ARM_REGNO_OK_FOR_BASE_P (regno);
5649 return (regno <= LAST_ARM_REGNUM
5650 || regno >= FIRST_PSEUDO_REGISTER
5651 || regno == FRAME_POINTER_REGNUM
5652 || regno == ARG_POINTER_REGNUM);
5655 /* Return TRUE if this rtx is the difference of a symbol and a label,
5656 and will reduce to a PC-relative relocation in the object file.
5657 Expressions like this can be left alone when generating PIC, rather
5658 than forced through the GOT. */
5660 pcrel_constant_p (rtx x)
5662 if (GET_CODE (x) == MINUS)
5663 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5668 /* Return true if X will surely end up in an index register after next
5671 will_be_in_index_register (const_rtx x)
5673 /* arm.md: calculate_pic_address will split this into a register. */
5674 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5677 /* Return nonzero if X is a valid ARM state address operand. */
5679 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5683 enum rtx_code code = GET_CODE (x);
5685 if (arm_address_register_rtx_p (x, strict_p))
5688 use_ldrd = (TARGET_LDRD
5690 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5692 if (code == POST_INC || code == PRE_DEC
5693 || ((code == PRE_INC || code == POST_DEC)
5694 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5695 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5697 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5698 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5699 && GET_CODE (XEXP (x, 1)) == PLUS
5700 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5702 rtx addend = XEXP (XEXP (x, 1), 1);
5704 /* Don't allow ldrd post increment by register because it's hard
5705 to fixup invalid register choices. */
5707 && GET_CODE (x) == POST_MODIFY
5708 && GET_CODE (addend) == REG)
5711 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5712 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5715 /* After reload constants split into minipools will have addresses
5716 from a LABEL_REF. */
5717 else if (reload_completed
5718 && (code == LABEL_REF
5720 && GET_CODE (XEXP (x, 0)) == PLUS
5721 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5722 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5725 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5728 else if (code == PLUS)
5730 rtx xop0 = XEXP (x, 0);
5731 rtx xop1 = XEXP (x, 1);
5733 return ((arm_address_register_rtx_p (xop0, strict_p)
5734 && ((GET_CODE(xop1) == CONST_INT
5735 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5736 || (!strict_p && will_be_in_index_register (xop1))))
5737 || (arm_address_register_rtx_p (xop1, strict_p)
5738 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5742 /* Reload currently can't handle MINUS, so disable this for now */
5743 else if (GET_CODE (x) == MINUS)
5745 rtx xop0 = XEXP (x, 0);
5746 rtx xop1 = XEXP (x, 1);
5748 return (arm_address_register_rtx_p (xop0, strict_p)
5749 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5753 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5754 && code == SYMBOL_REF
5755 && CONSTANT_POOL_ADDRESS_P (x)
5757 && symbol_mentioned_p (get_pool_constant (x))
5758 && ! pcrel_constant_p (get_pool_constant (x))))
5764 /* Return nonzero if X is a valid Thumb-2 address operand. */
5766 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5769 enum rtx_code code = GET_CODE (x);
5771 if (arm_address_register_rtx_p (x, strict_p))
5774 use_ldrd = (TARGET_LDRD
5776 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5778 if (code == POST_INC || code == PRE_DEC
5779 || ((code == PRE_INC || code == POST_DEC)
5780 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5781 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5783 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5784 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5785 && GET_CODE (XEXP (x, 1)) == PLUS
5786 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5788 /* Thumb-2 only has autoincrement by constant. */
5789 rtx addend = XEXP (XEXP (x, 1), 1);
5790 HOST_WIDE_INT offset;
5792 if (GET_CODE (addend) != CONST_INT)
5795 offset = INTVAL(addend);
5796 if (GET_MODE_SIZE (mode) <= 4)
5797 return (offset > -256 && offset < 256);
5799 return (use_ldrd && offset > -1024 && offset < 1024
5800 && (offset & 3) == 0);
5803 /* After reload constants split into minipools will have addresses
5804 from a LABEL_REF. */
5805 else if (reload_completed
5806 && (code == LABEL_REF
5808 && GET_CODE (XEXP (x, 0)) == PLUS
5809 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5810 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5813 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5816 else if (code == PLUS)
5818 rtx xop0 = XEXP (x, 0);
5819 rtx xop1 = XEXP (x, 1);
5821 return ((arm_address_register_rtx_p (xop0, strict_p)
5822 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5823 || (!strict_p && will_be_in_index_register (xop1))))
5824 || (arm_address_register_rtx_p (xop1, strict_p)
5825 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5828 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5829 && code == SYMBOL_REF
5830 && CONSTANT_POOL_ADDRESS_P (x)
5832 && symbol_mentioned_p (get_pool_constant (x))
5833 && ! pcrel_constant_p (get_pool_constant (x))))
5839 /* Return nonzero if INDEX is valid for an address index operand in
5842 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5845 HOST_WIDE_INT range;
5846 enum rtx_code code = GET_CODE (index);
5848 /* Standard coprocessor addressing modes. */
5849 if (TARGET_HARD_FLOAT
5850 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5851 && (mode == SFmode || mode == DFmode
5852 || (TARGET_MAVERICK && mode == DImode)))
5853 return (code == CONST_INT && INTVAL (index) < 1024
5854 && INTVAL (index) > -1024
5855 && (INTVAL (index) & 3) == 0);
5857 /* For quad modes, we restrict the constant offset to be slightly less
5858 than what the instruction format permits. We do this because for
5859 quad mode moves, we will actually decompose them into two separate
5860 double-mode reads or writes. INDEX must therefore be a valid
5861 (double-mode) offset and so should INDEX+8. */
5862 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5863 return (code == CONST_INT
5864 && INTVAL (index) < 1016
5865 && INTVAL (index) > -1024
5866 && (INTVAL (index) & 3) == 0);
5868 /* We have no such constraint on double mode offsets, so we permit the
5869 full range of the instruction format. */
5870 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5871 return (code == CONST_INT
5872 && INTVAL (index) < 1024
5873 && INTVAL (index) > -1024
5874 && (INTVAL (index) & 3) == 0);
5876 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5877 return (code == CONST_INT
5878 && INTVAL (index) < 1024
5879 && INTVAL (index) > -1024
5880 && (INTVAL (index) & 3) == 0);
5882 if (arm_address_register_rtx_p (index, strict_p)
5883 && (GET_MODE_SIZE (mode) <= 4))
5886 if (mode == DImode || mode == DFmode)
5888 if (code == CONST_INT)
5890 HOST_WIDE_INT val = INTVAL (index);
5893 return val > -256 && val < 256;
5895 return val > -4096 && val < 4092;
5898 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5901 if (GET_MODE_SIZE (mode) <= 4
5905 || (mode == QImode && outer == SIGN_EXTEND))))
5909 rtx xiop0 = XEXP (index, 0);
5910 rtx xiop1 = XEXP (index, 1);
5912 return ((arm_address_register_rtx_p (xiop0, strict_p)
5913 && power_of_two_operand (xiop1, SImode))
5914 || (arm_address_register_rtx_p (xiop1, strict_p)
5915 && power_of_two_operand (xiop0, SImode)));
5917 else if (code == LSHIFTRT || code == ASHIFTRT
5918 || code == ASHIFT || code == ROTATERT)
5920 rtx op = XEXP (index, 1);
5922 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5923 && GET_CODE (op) == CONST_INT
5925 && INTVAL (op) <= 31);
5929 /* For ARM v4 we may be doing a sign-extend operation during the
5935 || (outer == SIGN_EXTEND && mode == QImode))
5941 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5943 return (code == CONST_INT
5944 && INTVAL (index) < range
5945 && INTVAL (index) > -range);
5948 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5949 index operand. i.e. 1, 2, 4 or 8. */
5951 thumb2_index_mul_operand (rtx op)
5955 if (GET_CODE(op) != CONST_INT)
5959 return (val == 1 || val == 2 || val == 4 || val == 8);
5962 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5964 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5966 enum rtx_code code = GET_CODE (index);
5968 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5969 /* Standard coprocessor addressing modes. */
5970 if (TARGET_HARD_FLOAT
5971 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5972 && (mode == SFmode || mode == DFmode
5973 || (TARGET_MAVERICK && mode == DImode)))
5974 return (code == CONST_INT && INTVAL (index) < 1024
5975 /* Thumb-2 allows only > -256 index range for it's core register
5976 load/stores. Since we allow SF/DF in core registers, we have
5977 to use the intersection between -256~4096 (core) and -1024~1024
5979 && INTVAL (index) > -256
5980 && (INTVAL (index) & 3) == 0);
5982 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5984 /* For DImode assume values will usually live in core regs
5985 and only allow LDRD addressing modes. */
5986 if (!TARGET_LDRD || mode != DImode)
5987 return (code == CONST_INT
5988 && INTVAL (index) < 1024
5989 && INTVAL (index) > -1024
5990 && (INTVAL (index) & 3) == 0);
5993 /* For quad modes, we restrict the constant offset to be slightly less
5994 than what the instruction format permits. We do this because for
5995 quad mode moves, we will actually decompose them into two separate
5996 double-mode reads or writes. INDEX must therefore be a valid
5997 (double-mode) offset and so should INDEX+8. */
5998 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5999 return (code == CONST_INT
6000 && INTVAL (index) < 1016
6001 && INTVAL (index) > -1024
6002 && (INTVAL (index) & 3) == 0);
6004 /* We have no such constraint on double mode offsets, so we permit the
6005 full range of the instruction format. */
6006 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6007 return (code == CONST_INT
6008 && INTVAL (index) < 1024
6009 && INTVAL (index) > -1024
6010 && (INTVAL (index) & 3) == 0);
6012 if (arm_address_register_rtx_p (index, strict_p)
6013 && (GET_MODE_SIZE (mode) <= 4))
6016 if (mode == DImode || mode == DFmode)
6018 if (code == CONST_INT)
6020 HOST_WIDE_INT val = INTVAL (index);
6021 /* ??? Can we assume ldrd for thumb2? */
6022 /* Thumb-2 ldrd only has reg+const addressing modes. */
6023 /* ldrd supports offsets of +-1020.
6024 However the ldr fallback does not. */
6025 return val > -256 && val < 256 && (val & 3) == 0;
6033 rtx xiop0 = XEXP (index, 0);
6034 rtx xiop1 = XEXP (index, 1);
6036 return ((arm_address_register_rtx_p (xiop0, strict_p)
6037 && thumb2_index_mul_operand (xiop1))
6038 || (arm_address_register_rtx_p (xiop1, strict_p)
6039 && thumb2_index_mul_operand (xiop0)));
6041 else if (code == ASHIFT)
6043 rtx op = XEXP (index, 1);
6045 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6046 && GET_CODE (op) == CONST_INT
6048 && INTVAL (op) <= 3);
6051 return (code == CONST_INT
6052 && INTVAL (index) < 4096
6053 && INTVAL (index) > -256);
6056 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6058 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6062 if (GET_CODE (x) != REG)
6068 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6070 return (regno <= LAST_LO_REGNUM
6071 || regno > LAST_VIRTUAL_REGISTER
6072 || regno == FRAME_POINTER_REGNUM
6073 || (GET_MODE_SIZE (mode) >= 4
6074 && (regno == STACK_POINTER_REGNUM
6075 || regno >= FIRST_PSEUDO_REGISTER
6076 || x == hard_frame_pointer_rtx
6077 || x == arg_pointer_rtx)));
6080 /* Return nonzero if x is a legitimate index register. This is the case
6081 for any base register that can access a QImode object. */
6083 thumb1_index_register_rtx_p (rtx x, int strict_p)
6085 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6088 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6090 The AP may be eliminated to either the SP or the FP, so we use the
6091 least common denominator, e.g. SImode, and offsets from 0 to 64.
6093 ??? Verify whether the above is the right approach.
6095 ??? Also, the FP may be eliminated to the SP, so perhaps that
6096 needs special handling also.
6098 ??? Look at how the mips16 port solves this problem. It probably uses
6099 better ways to solve some of these problems.
6101 Although it is not incorrect, we don't accept QImode and HImode
6102 addresses based on the frame pointer or arg pointer until the
6103 reload pass starts. This is so that eliminating such addresses
6104 into stack based ones won't produce impossible code. */
6106 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6108 /* ??? Not clear if this is right. Experiment. */
6109 if (GET_MODE_SIZE (mode) < 4
6110 && !(reload_in_progress || reload_completed)
6111 && (reg_mentioned_p (frame_pointer_rtx, x)
6112 || reg_mentioned_p (arg_pointer_rtx, x)
6113 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6114 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6115 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6116 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6119 /* Accept any base register. SP only in SImode or larger. */
6120 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6123 /* This is PC relative data before arm_reorg runs. */
6124 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6125 && GET_CODE (x) == SYMBOL_REF
6126 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6129 /* This is PC relative data after arm_reorg runs. */
6130 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6132 && (GET_CODE (x) == LABEL_REF
6133 || (GET_CODE (x) == CONST
6134 && GET_CODE (XEXP (x, 0)) == PLUS
6135 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6136 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6139 /* Post-inc indexing only supported for SImode and larger. */
6140 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6141 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6144 else if (GET_CODE (x) == PLUS)
6146 /* REG+REG address can be any two index registers. */
6147 /* We disallow FRAME+REG addressing since we know that FRAME
6148 will be replaced with STACK, and SP relative addressing only
6149 permits SP+OFFSET. */
6150 if (GET_MODE_SIZE (mode) <= 4
6151 && XEXP (x, 0) != frame_pointer_rtx
6152 && XEXP (x, 1) != frame_pointer_rtx
6153 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6154 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6155 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6158 /* REG+const has 5-7 bit offset for non-SP registers. */
6159 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6160 || XEXP (x, 0) == arg_pointer_rtx)
6161 && GET_CODE (XEXP (x, 1)) == CONST_INT
6162 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6165 /* REG+const has 10-bit offset for SP, but only SImode and
6166 larger is supported. */
6167 /* ??? Should probably check for DI/DFmode overflow here
6168 just like GO_IF_LEGITIMATE_OFFSET does. */
6169 else if (GET_CODE (XEXP (x, 0)) == REG
6170 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6171 && GET_MODE_SIZE (mode) >= 4
6172 && GET_CODE (XEXP (x, 1)) == CONST_INT
6173 && INTVAL (XEXP (x, 1)) >= 0
6174 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6175 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6178 else if (GET_CODE (XEXP (x, 0)) == REG
6179 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6180 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6181 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6182 && REGNO (XEXP (x, 0))
6183 <= LAST_VIRTUAL_POINTER_REGISTER))
6184 && GET_MODE_SIZE (mode) >= 4
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
6186 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6190 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6191 && GET_MODE_SIZE (mode) == 4
6192 && GET_CODE (x) == SYMBOL_REF
6193 && CONSTANT_POOL_ADDRESS_P (x)
6195 && symbol_mentioned_p (get_pool_constant (x))
6196 && ! pcrel_constant_p (get_pool_constant (x))))
6202 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6203 instruction of mode MODE. */
6205 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6207 switch (GET_MODE_SIZE (mode))
6210 return val >= 0 && val < 32;
6213 return val >= 0 && val < 64 && (val & 1) == 0;
6217 && (val + GET_MODE_SIZE (mode)) <= 128
6223 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6226 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6227 else if (TARGET_THUMB2)
6228 return thumb2_legitimate_address_p (mode, x, strict_p);
6229 else /* if (TARGET_THUMB1) */
6230 return thumb1_legitimate_address_p (mode, x, strict_p);
6233 /* Build the SYMBOL_REF for __tls_get_addr. */
6235 static GTY(()) rtx tls_get_addr_libfunc;
6238 get_tls_get_addr (void)
6240 if (!tls_get_addr_libfunc)
6241 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6242 return tls_get_addr_libfunc;
6246 arm_load_tp (rtx target)
6249 target = gen_reg_rtx (SImode);
6253 /* Can return in any reg. */
6254 emit_insn (gen_load_tp_hard (target));
6258 /* Always returned in r0. Immediately copy the result into a pseudo,
6259 otherwise other uses of r0 (e.g. setting up function arguments) may
6260 clobber the value. */
6264 emit_insn (gen_load_tp_soft ());
6266 tmp = gen_rtx_REG (SImode, 0);
6267 emit_move_insn (target, tmp);
6273 load_tls_operand (rtx x, rtx reg)
6277 if (reg == NULL_RTX)
6278 reg = gen_reg_rtx (SImode);
6280 tmp = gen_rtx_CONST (SImode, x);
6282 emit_move_insn (reg, tmp);
6288 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6290 rtx insns, label, labelno, sum;
6292 gcc_assert (reloc != TLS_DESCSEQ);
6295 labelno = GEN_INT (pic_labelno++);
6296 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6297 label = gen_rtx_CONST (VOIDmode, label);
6299 sum = gen_rtx_UNSPEC (Pmode,
6300 gen_rtvec (4, x, GEN_INT (reloc), label,
6301 GEN_INT (TARGET_ARM ? 8 : 4)),
6303 reg = load_tls_operand (sum, reg);
6306 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6308 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6310 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6311 LCT_PURE, /* LCT_CONST? */
6312 Pmode, 1, reg, Pmode);
6314 insns = get_insns ();
6321 arm_tls_descseq_addr (rtx x, rtx reg)
6323 rtx labelno = GEN_INT (pic_labelno++);
6324 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6325 rtx sum = gen_rtx_UNSPEC (Pmode,
6326 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6327 gen_rtx_CONST (VOIDmode, label),
6328 GEN_INT (!TARGET_ARM)),
6330 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6332 emit_insn (gen_tlscall (x, labelno));
6334 reg = gen_reg_rtx (SImode);
6336 gcc_assert (REGNO (reg) != 0);
6338 emit_move_insn (reg, reg0);
6344 legitimize_tls_address (rtx x, rtx reg)
6346 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6347 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6351 case TLS_MODEL_GLOBAL_DYNAMIC:
6352 if (TARGET_GNU2_TLS)
6354 reg = arm_tls_descseq_addr (x, reg);
6356 tp = arm_load_tp (NULL_RTX);
6358 dest = gen_rtx_PLUS (Pmode, tp, reg);
6362 /* Original scheme */
6363 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6364 dest = gen_reg_rtx (Pmode);
6365 emit_libcall_block (insns, dest, ret, x);
6369 case TLS_MODEL_LOCAL_DYNAMIC:
6370 if (TARGET_GNU2_TLS)
6372 reg = arm_tls_descseq_addr (x, reg);
6374 tp = arm_load_tp (NULL_RTX);
6376 dest = gen_rtx_PLUS (Pmode, tp, reg);
6380 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6382 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6383 share the LDM result with other LD model accesses. */
6384 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6386 dest = gen_reg_rtx (Pmode);
6387 emit_libcall_block (insns, dest, ret, eqv);
6389 /* Load the addend. */
6390 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6391 GEN_INT (TLS_LDO32)),
6393 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6394 dest = gen_rtx_PLUS (Pmode, dest, addend);
6398 case TLS_MODEL_INITIAL_EXEC:
6399 labelno = GEN_INT (pic_labelno++);
6400 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6401 label = gen_rtx_CONST (VOIDmode, label);
6402 sum = gen_rtx_UNSPEC (Pmode,
6403 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6404 GEN_INT (TARGET_ARM ? 8 : 4)),
6406 reg = load_tls_operand (sum, reg);
6409 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6410 else if (TARGET_THUMB2)
6411 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6414 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6415 emit_move_insn (reg, gen_const_mem (SImode, reg));
6418 tp = arm_load_tp (NULL_RTX);
6420 return gen_rtx_PLUS (Pmode, tp, reg);
6422 case TLS_MODEL_LOCAL_EXEC:
6423 tp = arm_load_tp (NULL_RTX);
6425 reg = gen_rtx_UNSPEC (Pmode,
6426 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6428 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6430 return gen_rtx_PLUS (Pmode, tp, reg);
6437 /* Try machine-dependent ways of modifying an illegitimate address
6438 to be legitimate. If we find one, return the new, valid address. */
6440 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6444 /* TODO: legitimize_address for Thumb2. */
6447 return thumb_legitimize_address (x, orig_x, mode);
6450 if (arm_tls_symbol_p (x))
6451 return legitimize_tls_address (x, NULL_RTX);
6453 if (GET_CODE (x) == PLUS)
6455 rtx xop0 = XEXP (x, 0);
6456 rtx xop1 = XEXP (x, 1);
6458 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6459 xop0 = force_reg (SImode, xop0);
6461 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6462 xop1 = force_reg (SImode, xop1);
6464 if (ARM_BASE_REGISTER_RTX_P (xop0)
6465 && GET_CODE (xop1) == CONST_INT)
6467 HOST_WIDE_INT n, low_n;
6471 /* VFP addressing modes actually allow greater offsets, but for
6472 now we just stick with the lowest common denominator. */
6474 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6486 low_n = ((mode) == TImode ? 0
6487 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6491 base_reg = gen_reg_rtx (SImode);
6492 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6493 emit_move_insn (base_reg, val);
6494 x = plus_constant (base_reg, low_n);
6496 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6497 x = gen_rtx_PLUS (SImode, xop0, xop1);
6500 /* XXX We don't allow MINUS any more -- see comment in
6501 arm_legitimate_address_outer_p (). */
6502 else if (GET_CODE (x) == MINUS)
6504 rtx xop0 = XEXP (x, 0);
6505 rtx xop1 = XEXP (x, 1);
6507 if (CONSTANT_P (xop0))
6508 xop0 = force_reg (SImode, xop0);
6510 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6511 xop1 = force_reg (SImode, xop1);
6513 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6514 x = gen_rtx_MINUS (SImode, xop0, xop1);
6517 /* Make sure to take full advantage of the pre-indexed addressing mode
6518 with absolute addresses which often allows for the base register to
6519 be factorized for multiple adjacent memory references, and it might
6520 even allows for the mini pool to be avoided entirely. */
6521 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6524 HOST_WIDE_INT mask, base, index;
6527 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6528 use a 8-bit index. So let's use a 12-bit index for SImode only and
6529 hope that arm_gen_constant will enable ldrb to use more bits. */
6530 bits = (mode == SImode) ? 12 : 8;
6531 mask = (1 << bits) - 1;
6532 base = INTVAL (x) & ~mask;
6533 index = INTVAL (x) & mask;
6534 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6536 /* It'll most probably be more efficient to generate the base
6537 with more bits set and use a negative index instead. */
6541 base_reg = force_reg (SImode, GEN_INT (base));
6542 x = plus_constant (base_reg, index);
6547 /* We need to find and carefully transform any SYMBOL and LABEL
6548 references; so go back to the original address expression. */
6549 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6551 if (new_x != orig_x)
6559 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6560 to be legitimate. If we find one, return the new, valid address. */
6562 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6564 if (arm_tls_symbol_p (x))
6565 return legitimize_tls_address (x, NULL_RTX);
6567 if (GET_CODE (x) == PLUS
6568 && GET_CODE (XEXP (x, 1)) == CONST_INT
6569 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6570 || INTVAL (XEXP (x, 1)) < 0))
6572 rtx xop0 = XEXP (x, 0);
6573 rtx xop1 = XEXP (x, 1);
6574 HOST_WIDE_INT offset = INTVAL (xop1);
6576 /* Try and fold the offset into a biasing of the base register and
6577 then offsetting that. Don't do this when optimizing for space
6578 since it can cause too many CSEs. */
6579 if (optimize_size && offset >= 0
6580 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6582 HOST_WIDE_INT delta;
6585 delta = offset - (256 - GET_MODE_SIZE (mode));
6586 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6587 delta = 31 * GET_MODE_SIZE (mode);
6589 delta = offset & (~31 * GET_MODE_SIZE (mode));
6591 xop0 = force_operand (plus_constant (xop0, offset - delta),
6593 x = plus_constant (xop0, delta);
6595 else if (offset < 0 && offset > -256)
6596 /* Small negative offsets are best done with a subtract before the
6597 dereference, forcing these into a register normally takes two
6599 x = force_operand (x, NULL_RTX);
6602 /* For the remaining cases, force the constant into a register. */
6603 xop1 = force_reg (SImode, xop1);
6604 x = gen_rtx_PLUS (SImode, xop0, xop1);
6607 else if (GET_CODE (x) == PLUS
6608 && s_register_operand (XEXP (x, 1), SImode)
6609 && !s_register_operand (XEXP (x, 0), SImode))
6611 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6613 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6618 /* We need to find and carefully transform any SYMBOL and LABEL
6619 references; so go back to the original address expression. */
6620 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6622 if (new_x != orig_x)
6630 arm_legitimize_reload_address (rtx *p,
6631 enum machine_mode mode,
6632 int opnum, int type,
6633 int ind_levels ATTRIBUTE_UNUSED)
6635 /* We must recognize output that we have already generated ourselves. */
6636 if (GET_CODE (*p) == PLUS
6637 && GET_CODE (XEXP (*p, 0)) == PLUS
6638 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6639 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6640 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6642 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6643 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6644 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6648 if (GET_CODE (*p) == PLUS
6649 && GET_CODE (XEXP (*p, 0)) == REG
6650 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6651 /* If the base register is equivalent to a constant, let the generic
6652 code handle it. Otherwise we will run into problems if a future
6653 reload pass decides to rematerialize the constant. */
6654 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6655 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6657 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6658 HOST_WIDE_INT low, high;
6660 /* Detect coprocessor load/stores. */
6661 bool coproc_p = ((TARGET_HARD_FLOAT
6662 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6663 && (mode == SFmode || mode == DFmode
6664 || (mode == DImode && TARGET_MAVERICK)))
6665 || (TARGET_REALLY_IWMMXT
6666 && VALID_IWMMXT_REG_MODE (mode))
6668 && (VALID_NEON_DREG_MODE (mode)
6669 || VALID_NEON_QREG_MODE (mode))));
6671 /* For some conditions, bail out when lower two bits are unaligned. */
6672 if ((val & 0x3) != 0
6673 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6675 /* For DI, and DF under soft-float: */
6676 || ((mode == DImode || mode == DFmode)
6677 /* Without ldrd, we use stm/ldm, which does not
6678 fair well with unaligned bits. */
6680 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6681 || TARGET_THUMB2))))
6684 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6685 of which the (reg+high) gets turned into a reload add insn,
6686 we try to decompose the index into high/low values that can often
6687 also lead to better reload CSE.
6689 ldr r0, [r2, #4100] // Offset too large
6690 ldr r1, [r2, #4104] // Offset too large
6692 is best reloaded as:
6698 which post-reload CSE can simplify in most cases to eliminate the
6699 second add instruction:
6704 The idea here is that we want to split out the bits of the constant
6705 as a mask, rather than as subtracting the maximum offset that the
6706 respective type of load/store used can handle.
6708 When encountering negative offsets, we can still utilize it even if
6709 the overall offset is positive; sometimes this may lead to an immediate
6710 that can be constructed with fewer instructions.
6712 ldr r0, [r2, #0x3FFFFC]
6714 This is best reloaded as:
6715 add t1, r2, #0x400000
6718 The trick for spotting this for a load insn with N bits of offset
6719 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6720 negative offset that is going to make bit N and all the bits below
6721 it become zero in the remainder part.
6723 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6724 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6725 used in most cases of ARM load/store instructions. */
6727 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6728 (((VAL) & ((1 << (N)) - 1)) \
6729 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6734 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6736 /* NEON quad-word load/stores are made of two double-word accesses,
6737 so the valid index range is reduced by 8. Treat as 9-bit range if
6739 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6740 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6742 else if (GET_MODE_SIZE (mode) == 8)
6745 low = (TARGET_THUMB2
6746 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6747 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6749 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6750 to access doublewords. The supported load/store offsets are
6751 -8, -4, and 4, which we try to produce here. */
6752 low = ((val & 0xf) ^ 0x8) - 0x8;
6754 else if (GET_MODE_SIZE (mode) < 8)
6756 /* NEON element load/stores do not have an offset. */
6757 if (TARGET_NEON_FP16 && mode == HFmode)
6762 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6763 Try the wider 12-bit range first, and re-try if the result
6765 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6767 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6771 if (mode == HImode || mode == HFmode)
6774 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6777 /* The storehi/movhi_bytes fallbacks can use only
6778 [-4094,+4094] of the full ldrb/strb index range. */
6779 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6780 if (low == 4095 || low == -4095)
6785 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6791 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6792 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6793 - (unsigned HOST_WIDE_INT) 0x80000000);
6794 /* Check for overflow or zero */
6795 if (low == 0 || high == 0 || (high + low != val))
6798 /* Reload the high part into a base reg; leave the low part
6800 *p = gen_rtx_PLUS (GET_MODE (*p),
6801 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6804 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6805 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6806 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6814 thumb_legitimize_reload_address (rtx *x_p,
6815 enum machine_mode mode,
6816 int opnum, int type,
6817 int ind_levels ATTRIBUTE_UNUSED)
6821 if (GET_CODE (x) == PLUS
6822 && GET_MODE_SIZE (mode) < 4
6823 && REG_P (XEXP (x, 0))
6824 && XEXP (x, 0) == stack_pointer_rtx
6825 && GET_CODE (XEXP (x, 1)) == CONST_INT
6826 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6831 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6832 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6836 /* If both registers are hi-regs, then it's better to reload the
6837 entire expression rather than each register individually. That
6838 only requires one reload register rather than two. */
6839 if (GET_CODE (x) == PLUS
6840 && REG_P (XEXP (x, 0))
6841 && REG_P (XEXP (x, 1))
6842 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6843 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6848 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6849 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6856 /* Test for various thread-local symbols. */
6858 /* Return TRUE if X is a thread-local symbol. */
6861 arm_tls_symbol_p (rtx x)
6863 if (! TARGET_HAVE_TLS)
6866 if (GET_CODE (x) != SYMBOL_REF)
6869 return SYMBOL_REF_TLS_MODEL (x) != 0;
6872 /* Helper for arm_tls_referenced_p. */
6875 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6877 if (GET_CODE (*x) == SYMBOL_REF)
6878 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6880 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6881 TLS offsets, not real symbol references. */
6882 if (GET_CODE (*x) == UNSPEC
6883 && XINT (*x, 1) == UNSPEC_TLS)
6889 /* Return TRUE if X contains any TLS symbol references. */
6892 arm_tls_referenced_p (rtx x)
6894 if (! TARGET_HAVE_TLS)
6897 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6900 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6902 On the ARM, allow any integer (invalid ones are removed later by insn
6903 patterns), nice doubles and symbol_refs which refer to the function's
6906 When generating pic allow anything. */
6909 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6911 /* At present, we have no support for Neon structure constants, so forbid
6912 them here. It might be possible to handle simple cases like 0 and -1
6914 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6917 return flag_pic || !label_mentioned_p (x);
6921 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6923 return (GET_CODE (x) == CONST_INT
6924 || GET_CODE (x) == CONST_DOUBLE
6925 || CONSTANT_ADDRESS_P (x)
6930 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6932 return (!arm_cannot_force_const_mem (mode, x)
6934 ? arm_legitimate_constant_p_1 (mode, x)
6935 : thumb_legitimate_constant_p (mode, x)));
6938 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6941 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6945 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6947 split_const (x, &base, &offset);
6948 if (GET_CODE (base) == SYMBOL_REF
6949 && !offset_within_block_p (base, INTVAL (offset)))
6952 return arm_tls_referenced_p (x);
6955 #define REG_OR_SUBREG_REG(X) \
6956 (GET_CODE (X) == REG \
6957 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6959 #define REG_OR_SUBREG_RTX(X) \
6960 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6963 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6965 enum machine_mode mode = GET_MODE (x);
6979 return COSTS_N_INSNS (1);
6982 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6985 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6992 return COSTS_N_INSNS (2) + cycles;
6994 return COSTS_N_INSNS (1) + 16;
6997 return (COSTS_N_INSNS (1)
6998 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6999 + GET_CODE (SET_DEST (x)) == MEM));
7004 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7006 if (thumb_shiftable_const (INTVAL (x)))
7007 return COSTS_N_INSNS (2);
7008 return COSTS_N_INSNS (3);
7010 else if ((outer == PLUS || outer == COMPARE)
7011 && INTVAL (x) < 256 && INTVAL (x) > -256)
7013 else if ((outer == IOR || outer == XOR || outer == AND)
7014 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7015 return COSTS_N_INSNS (1);
7016 else if (outer == AND)
7019 /* This duplicates the tests in the andsi3 expander. */
7020 for (i = 9; i <= 31; i++)
7021 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7022 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7023 return COSTS_N_INSNS (2);
7025 else if (outer == ASHIFT || outer == ASHIFTRT
7026 || outer == LSHIFTRT)
7028 return COSTS_N_INSNS (2);
7034 return COSTS_N_INSNS (3);
7052 /* XXX another guess. */
7053 /* Memory costs quite a lot for the first word, but subsequent words
7054 load at the equivalent of a single insn each. */
7055 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7067 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7068 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7074 return total + COSTS_N_INSNS (1);
7076 /* Assume a two-shift sequence. Increase the cost slightly so
7077 we prefer actual shifts over an extend operation. */
7078 return total + 1 + COSTS_N_INSNS (2);
7086 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7088 enum machine_mode mode = GET_MODE (x);
7089 enum rtx_code subcode;
7091 enum rtx_code code = GET_CODE (x);
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7106 if (TARGET_HARD_FLOAT && mode == SFmode)
7107 *total = COSTS_N_INSNS (2);
7108 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7109 *total = COSTS_N_INSNS (4);
7111 *total = COSTS_N_INSNS (20);
7115 if (GET_CODE (XEXP (x, 1)) == REG)
7116 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7117 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7118 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7124 *total += COSTS_N_INSNS (4);
7129 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7130 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7133 *total += COSTS_N_INSNS (3);
7137 *total += COSTS_N_INSNS (1);
7138 /* Increase the cost of complex shifts because they aren't any faster,
7139 and reduce dual issue opportunities. */
7140 if (arm_tune_cortex_a9
7141 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7149 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7150 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7151 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7153 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7157 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7158 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7160 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7167 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7169 if (TARGET_HARD_FLOAT
7171 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7173 *total = COSTS_N_INSNS (1);
7174 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7175 && arm_const_double_rtx (XEXP (x, 0)))
7177 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7181 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7182 && arm_const_double_rtx (XEXP (x, 1)))
7184 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7190 *total = COSTS_N_INSNS (20);
7194 *total = COSTS_N_INSNS (1);
7195 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7196 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7198 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7202 subcode = GET_CODE (XEXP (x, 1));
7203 if (subcode == ASHIFT || subcode == ASHIFTRT
7204 || subcode == LSHIFTRT
7205 || subcode == ROTATE || subcode == ROTATERT)
7207 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7208 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7212 /* A shift as a part of RSB costs no more than RSB itself. */
7213 if (GET_CODE (XEXP (x, 0)) == MULT
7214 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7216 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7217 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7222 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7224 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7225 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7229 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7230 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7232 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7233 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7234 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7235 *total += COSTS_N_INSNS (1);
7243 if (code == PLUS && arm_arch6 && mode == SImode
7244 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7245 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7247 *total = COSTS_N_INSNS (1);
7248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7250 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7254 /* MLA: All arguments must be registers. We filter out
7255 multiplication by a power of two, so that we fall down into
7257 if (GET_CODE (XEXP (x, 0)) == MULT
7258 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7260 /* The cost comes from the cost of the multiply. */
7264 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7266 if (TARGET_HARD_FLOAT
7268 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7270 *total = COSTS_N_INSNS (1);
7271 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7272 && arm_const_double_rtx (XEXP (x, 1)))
7274 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7281 *total = COSTS_N_INSNS (20);
7285 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7286 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7288 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7289 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7290 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7291 *total += COSTS_N_INSNS (1);
7297 case AND: case XOR: case IOR:
7299 /* Normally the frame registers will be spilt into reg+const during
7300 reload, so it is a bad idea to combine them with other instructions,
7301 since then they might not be moved outside of loops. As a compromise
7302 we allow integration with ops that have a constant as their second
7304 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7305 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7306 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7307 *total = COSTS_N_INSNS (1);
7311 *total += COSTS_N_INSNS (2);
7312 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7313 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7315 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7322 *total += COSTS_N_INSNS (1);
7323 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7324 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7326 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7329 subcode = GET_CODE (XEXP (x, 0));
7330 if (subcode == ASHIFT || subcode == ASHIFTRT
7331 || subcode == LSHIFTRT
7332 || subcode == ROTATE || subcode == ROTATERT)
7334 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7335 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7340 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7342 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7343 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7347 if (subcode == UMIN || subcode == UMAX
7348 || subcode == SMIN || subcode == SMAX)
7350 *total = COSTS_N_INSNS (3);
7357 /* This should have been handled by the CPU specific routines. */
7361 if (arm_arch3m && mode == SImode
7362 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7363 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7364 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7365 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7366 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7367 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7369 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7372 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7376 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7378 if (TARGET_HARD_FLOAT
7380 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7382 *total = COSTS_N_INSNS (1);
7385 *total = COSTS_N_INSNS (2);
7391 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7392 if (mode == SImode && code == NOT)
7394 subcode = GET_CODE (XEXP (x, 0));
7395 if (subcode == ASHIFT || subcode == ASHIFTRT
7396 || subcode == LSHIFTRT
7397 || subcode == ROTATE || subcode == ROTATERT
7399 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7401 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7402 /* Register shifts cost an extra cycle. */
7403 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7404 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7413 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7415 *total = COSTS_N_INSNS (4);
7419 operand = XEXP (x, 0);
7421 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7422 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7423 && GET_CODE (XEXP (operand, 0)) == REG
7424 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7425 *total += COSTS_N_INSNS (1);
7426 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7427 + rtx_cost (XEXP (x, 2), code, 2, speed));
7431 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7433 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7439 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7440 && mode == SImode && XEXP (x, 1) == const0_rtx)
7442 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7448 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7449 && mode == SImode && XEXP (x, 1) == const0_rtx)
7451 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7471 /* SCC insns. In the case where the comparison has already been
7472 performed, then they cost 2 instructions. Otherwise they need
7473 an additional comparison before them. */
7474 *total = COSTS_N_INSNS (2);
7475 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7482 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7488 *total += COSTS_N_INSNS (1);
7489 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7490 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7492 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7496 subcode = GET_CODE (XEXP (x, 0));
7497 if (subcode == ASHIFT || subcode == ASHIFTRT
7498 || subcode == LSHIFTRT
7499 || subcode == ROTATE || subcode == ROTATERT)
7501 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7502 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7507 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7509 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7510 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7520 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7521 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7522 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7523 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7527 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7529 if (TARGET_HARD_FLOAT
7531 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7533 *total = COSTS_N_INSNS (1);
7536 *total = COSTS_N_INSNS (20);
7539 *total = COSTS_N_INSNS (1);
7541 *total += COSTS_N_INSNS (3);
7547 if (GET_MODE_CLASS (mode) == MODE_INT)
7549 rtx op = XEXP (x, 0);
7550 enum machine_mode opmode = GET_MODE (op);
7553 *total += COSTS_N_INSNS (1);
7555 if (opmode != SImode)
7559 /* If !arm_arch4, we use one of the extendhisi2_mem
7560 or movhi_bytes patterns for HImode. For a QImode
7561 sign extension, we first zero-extend from memory
7562 and then perform a shift sequence. */
7563 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7564 *total += COSTS_N_INSNS (2);
7567 *total += COSTS_N_INSNS (1);
7569 /* We don't have the necessary insn, so we need to perform some
7571 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7572 /* An and with constant 255. */
7573 *total += COSTS_N_INSNS (1);
7575 /* A shift sequence. Increase costs slightly to avoid
7576 combining two shifts into an extend operation. */
7577 *total += COSTS_N_INSNS (2) + 1;
7583 switch (GET_MODE (XEXP (x, 0)))
7590 *total = COSTS_N_INSNS (1);
7600 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7604 if (const_ok_for_arm (INTVAL (x))
7605 || const_ok_for_arm (~INTVAL (x)))
7606 *total = COSTS_N_INSNS (1);
7608 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7609 INTVAL (x), NULL_RTX,
7616 *total = COSTS_N_INSNS (3);
7620 *total = COSTS_N_INSNS (1);
7624 *total = COSTS_N_INSNS (1);
7625 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7629 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7630 && (mode == SFmode || !TARGET_VFP_SINGLE))
7631 *total = COSTS_N_INSNS (1);
7633 *total = COSTS_N_INSNS (4);
7640 /* We cost this as high as our memory costs to allow this to
7641 be hoisted from loops. */
7642 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7644 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7649 *total = COSTS_N_INSNS (4);
7654 /* Estimates the size cost of thumb1 instructions.
7655 For now most of the code is copied from thumb1_rtx_costs. We need more
7656 fine grain tuning when we have more related test cases. */
7658 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7660 enum machine_mode mode = GET_MODE (x);
7673 return COSTS_N_INSNS (1);
7676 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7678 /* Thumb1 mul instruction can't operate on const. We must Load it
7679 into a register first. */
7680 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7681 return COSTS_N_INSNS (1) + const_size;
7683 return COSTS_N_INSNS (1);
7686 return (COSTS_N_INSNS (1)
7687 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7688 + GET_CODE (SET_DEST (x)) == MEM));
7693 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7694 return COSTS_N_INSNS (1);
7695 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7696 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7697 return COSTS_N_INSNS (2);
7698 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7699 if (thumb_shiftable_const (INTVAL (x)))
7700 return COSTS_N_INSNS (2);
7701 return COSTS_N_INSNS (3);
7703 else if ((outer == PLUS || outer == COMPARE)
7704 && INTVAL (x) < 256 && INTVAL (x) > -256)
7706 else if ((outer == IOR || outer == XOR || outer == AND)
7707 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7708 return COSTS_N_INSNS (1);
7709 else if (outer == AND)
7712 /* This duplicates the tests in the andsi3 expander. */
7713 for (i = 9; i <= 31; i++)
7714 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7715 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7716 return COSTS_N_INSNS (2);
7718 else if (outer == ASHIFT || outer == ASHIFTRT
7719 || outer == LSHIFTRT)
7721 return COSTS_N_INSNS (2);
7727 return COSTS_N_INSNS (3);
7745 /* XXX another guess. */
7746 /* Memory costs quite a lot for the first word, but subsequent words
7747 load at the equivalent of a single insn each. */
7748 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7749 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7754 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7759 /* XXX still guessing. */
7760 switch (GET_MODE (XEXP (x, 0)))
7763 return (1 + (mode == DImode ? 4 : 0)
7764 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7767 return (4 + (mode == DImode ? 4 : 0)
7768 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7771 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7782 /* RTX costs when optimizing for size. */
7784 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7787 enum machine_mode mode = GET_MODE (x);
7790 *total = thumb1_size_rtx_costs (x, code, outer_code);
7794 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7798 /* A memory access costs 1 insn if the mode is small, or the address is
7799 a single register, otherwise it costs one insn per word. */
7800 if (REG_P (XEXP (x, 0)))
7801 *total = COSTS_N_INSNS (1);
7803 && GET_CODE (XEXP (x, 0)) == PLUS
7804 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7805 /* This will be split into two instructions.
7806 See arm.md:calculate_pic_address. */
7807 *total = COSTS_N_INSNS (2);
7809 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7816 /* Needs a libcall, so it costs about this. */
7817 *total = COSTS_N_INSNS (2);
7821 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7823 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7831 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7833 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7836 else if (mode == SImode)
7838 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7839 /* Slightly disparage register shifts, but not by much. */
7840 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7841 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7845 /* Needs a libcall. */
7846 *total = COSTS_N_INSNS (2);
7850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7851 && (mode == SFmode || !TARGET_VFP_SINGLE))
7853 *total = COSTS_N_INSNS (1);
7859 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7860 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7862 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7863 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7864 || subcode1 == ROTATE || subcode1 == ROTATERT
7865 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7866 || subcode1 == ASHIFTRT)
7868 /* It's just the cost of the two operands. */
7873 *total = COSTS_N_INSNS (1);
7877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7881 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7882 && (mode == SFmode || !TARGET_VFP_SINGLE))
7884 *total = COSTS_N_INSNS (1);
7888 /* A shift as a part of ADD costs nothing. */
7889 if (GET_CODE (XEXP (x, 0)) == MULT
7890 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7892 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7893 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7894 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7899 case AND: case XOR: case IOR:
7902 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7904 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7905 || subcode == LSHIFTRT || subcode == ASHIFTRT
7906 || (code == AND && subcode == NOT))
7908 /* It's just the cost of the two operands. */
7914 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7918 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7922 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7923 && (mode == SFmode || !TARGET_VFP_SINGLE))
7925 *total = COSTS_N_INSNS (1);
7931 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7940 if (cc_register (XEXP (x, 0), VOIDmode))
7943 *total = COSTS_N_INSNS (1);
7947 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7948 && (mode == SFmode || !TARGET_VFP_SINGLE))
7949 *total = COSTS_N_INSNS (1);
7951 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7956 return arm_rtx_costs_1 (x, outer_code, total, 0);
7959 if (const_ok_for_arm (INTVAL (x)))
7960 /* A multiplication by a constant requires another instruction
7961 to load the constant to a register. */
7962 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7964 else if (const_ok_for_arm (~INTVAL (x)))
7965 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7966 else if (const_ok_for_arm (-INTVAL (x)))
7968 if (outer_code == COMPARE || outer_code == PLUS
7969 || outer_code == MINUS)
7972 *total = COSTS_N_INSNS (1);
7975 *total = COSTS_N_INSNS (2);
7981 *total = COSTS_N_INSNS (2);
7985 *total = COSTS_N_INSNS (4);
7990 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7991 cost of these slightly. */
7992 *total = COSTS_N_INSNS (1) + 1;
7999 if (mode != VOIDmode)
8000 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8002 *total = COSTS_N_INSNS (4); /* How knows? */
8007 /* RTX costs when optimizing for size. */
8009 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8010 int *total, bool speed)
8013 return arm_size_rtx_costs (x, (enum rtx_code) code,
8014 (enum rtx_code) outer_code, total);
8016 return current_tune->rtx_costs (x, (enum rtx_code) code,
8017 (enum rtx_code) outer_code,
8021 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8022 supported on any "slowmul" cores, so it can be ignored. */
8025 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8026 int *total, bool speed)
8028 enum machine_mode mode = GET_MODE (x);
8032 *total = thumb1_rtx_costs (x, code, outer_code);
8039 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8042 *total = COSTS_N_INSNS (20);
8046 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8048 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8049 & (unsigned HOST_WIDE_INT) 0xffffffff);
8050 int cost, const_ok = const_ok_for_arm (i);
8051 int j, booth_unit_size;
8053 /* Tune as appropriate. */
8054 cost = const_ok ? 4 : 8;
8055 booth_unit_size = 2;
8056 for (j = 0; i && j < 32; j += booth_unit_size)
8058 i >>= booth_unit_size;
8062 *total = COSTS_N_INSNS (cost);
8063 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8067 *total = COSTS_N_INSNS (20);
8071 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8076 /* RTX cost for cores with a fast multiply unit (M variants). */
8079 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8080 int *total, bool speed)
8082 enum machine_mode mode = GET_MODE (x);
8086 *total = thumb1_rtx_costs (x, code, outer_code);
8090 /* ??? should thumb2 use different costs? */
8094 /* There is no point basing this on the tuning, since it is always the
8095 fast variant if it exists at all. */
8097 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8098 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8099 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8101 *total = COSTS_N_INSNS(2);
8108 *total = COSTS_N_INSNS (5);
8112 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8114 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8115 & (unsigned HOST_WIDE_INT) 0xffffffff);
8116 int cost, const_ok = const_ok_for_arm (i);
8117 int j, booth_unit_size;
8119 /* Tune as appropriate. */
8120 cost = const_ok ? 4 : 8;
8121 booth_unit_size = 8;
8122 for (j = 0; i && j < 32; j += booth_unit_size)
8124 i >>= booth_unit_size;
8128 *total = COSTS_N_INSNS(cost);
8134 *total = COSTS_N_INSNS (4);
8138 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8140 if (TARGET_HARD_FLOAT
8142 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8144 *total = COSTS_N_INSNS (1);
8149 /* Requires a lib call */
8150 *total = COSTS_N_INSNS (20);
8154 return arm_rtx_costs_1 (x, outer_code, total, speed);
8159 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8160 so it can be ignored. */
8163 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8164 int *total, bool speed)
8166 enum machine_mode mode = GET_MODE (x);
8170 *total = thumb1_rtx_costs (x, code, outer_code);
8177 if (GET_CODE (XEXP (x, 0)) != MULT)
8178 return arm_rtx_costs_1 (x, outer_code, total, speed);
8180 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8181 will stall until the multiplication is complete. */
8182 *total = COSTS_N_INSNS (3);
8186 /* There is no point basing this on the tuning, since it is always the
8187 fast variant if it exists at all. */
8189 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8190 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8191 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8193 *total = COSTS_N_INSNS (2);
8200 *total = COSTS_N_INSNS (5);
8204 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8206 /* If operand 1 is a constant we can more accurately
8207 calculate the cost of the multiply. The multiplier can
8208 retire 15 bits on the first cycle and a further 12 on the
8209 second. We do, of course, have to load the constant into
8210 a register first. */
8211 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8212 /* There's a general overhead of one cycle. */
8214 unsigned HOST_WIDE_INT masked_const;
8219 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8221 masked_const = i & 0xffff8000;
8222 if (masked_const != 0)
8225 masked_const = i & 0xf8000000;
8226 if (masked_const != 0)
8229 *total = COSTS_N_INSNS (cost);
8235 *total = COSTS_N_INSNS (3);
8239 /* Requires a lib call */
8240 *total = COSTS_N_INSNS (20);
8244 return arm_rtx_costs_1 (x, outer_code, total, speed);
8249 /* RTX costs for 9e (and later) cores. */
8252 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8253 int *total, bool speed)
8255 enum machine_mode mode = GET_MODE (x);
8262 *total = COSTS_N_INSNS (3);
8266 *total = thumb1_rtx_costs (x, code, outer_code);
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8277 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8278 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8281 *total = COSTS_N_INSNS (2);
8288 *total = COSTS_N_INSNS (5);
8294 *total = COSTS_N_INSNS (2);
8298 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8300 if (TARGET_HARD_FLOAT
8302 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8304 *total = COSTS_N_INSNS (1);
8309 *total = COSTS_N_INSNS (20);
8313 return arm_rtx_costs_1 (x, outer_code, total, speed);
8316 /* All address computations that can be done are free, but rtx cost returns
8317 the same for practically all of them. So we weight the different types
8318 of address here in the order (most pref first):
8319 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8321 arm_arm_address_cost (rtx x)
8323 enum rtx_code c = GET_CODE (x);
8325 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8327 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8332 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8335 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8345 arm_thumb_address_cost (rtx x)
8347 enum rtx_code c = GET_CODE (x);
8352 && GET_CODE (XEXP (x, 0)) == REG
8353 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8360 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8362 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8365 /* Adjust cost hook for XScale. */
8367 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8369 /* Some true dependencies can have a higher cost depending
8370 on precisely how certain input operands are used. */
8371 if (REG_NOTE_KIND(link) == 0
8372 && recog_memoized (insn) >= 0
8373 && recog_memoized (dep) >= 0)
8375 int shift_opnum = get_attr_shift (insn);
8376 enum attr_type attr_type = get_attr_type (dep);
8378 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8379 operand for INSN. If we have a shifted input operand and the
8380 instruction we depend on is another ALU instruction, then we may
8381 have to account for an additional stall. */
8382 if (shift_opnum != 0
8383 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8385 rtx shifted_operand;
8388 /* Get the shifted operand. */
8389 extract_insn (insn);
8390 shifted_operand = recog_data.operand[shift_opnum];
8392 /* Iterate over all the operands in DEP. If we write an operand
8393 that overlaps with SHIFTED_OPERAND, then we have increase the
8394 cost of this dependency. */
8396 preprocess_constraints ();
8397 for (opno = 0; opno < recog_data.n_operands; opno++)
8399 /* We can ignore strict inputs. */
8400 if (recog_data.operand_type[opno] == OP_IN)
8403 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8415 /* Adjust cost hook for Cortex A9. */
8417 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8419 switch (REG_NOTE_KIND (link))
8426 case REG_DEP_OUTPUT:
8427 if (recog_memoized (insn) >= 0
8428 && recog_memoized (dep) >= 0)
8430 if (GET_CODE (PATTERN (insn)) == SET)
8433 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8435 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8437 enum attr_type attr_type_insn = get_attr_type (insn);
8438 enum attr_type attr_type_dep = get_attr_type (dep);
8440 /* By default all dependencies of the form
8443 have an extra latency of 1 cycle because
8444 of the input and output dependency in this
8445 case. However this gets modeled as an true
8446 dependency and hence all these checks. */
8447 if (REG_P (SET_DEST (PATTERN (insn)))
8448 && REG_P (SET_DEST (PATTERN (dep)))
8449 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8450 SET_DEST (PATTERN (dep))))
8452 /* FMACS is a special case where the dependant
8453 instruction can be issued 3 cycles before
8454 the normal latency in case of an output
8456 if ((attr_type_insn == TYPE_FMACS
8457 || attr_type_insn == TYPE_FMACD)
8458 && (attr_type_dep == TYPE_FMACS
8459 || attr_type_dep == TYPE_FMACD))
8461 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8462 *cost = insn_default_latency (dep) - 3;
8464 *cost = insn_default_latency (dep);
8469 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8470 *cost = insn_default_latency (dep) + 1;
8472 *cost = insn_default_latency (dep);
8488 /* Adjust cost hook for FA726TE. */
8490 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8492 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8493 have penalty of 3. */
8494 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8495 && recog_memoized (insn) >= 0
8496 && recog_memoized (dep) >= 0
8497 && get_attr_conds (dep) == CONDS_SET)
8499 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8500 if (get_attr_conds (insn) == CONDS_USE
8501 && get_attr_type (insn) != TYPE_BRANCH)
8507 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8508 || get_attr_conds (insn) == CONDS_USE)
8518 /* Implement TARGET_REGISTER_MOVE_COST.
8520 Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8521 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8522 it is typically more expensive than a single memory access. We set
8523 the cost to less than two memory accesses so that floating
8524 point to integer conversion does not go through memory. */
8527 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8528 reg_class_t from, reg_class_t to)
8532 if ((from == FPA_REGS && to != FPA_REGS)
8533 || (from != FPA_REGS && to == FPA_REGS))
8535 else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8536 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8538 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8539 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8541 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8543 else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8544 || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8551 if (from == HI_REGS || to == HI_REGS)
8558 /* Implement TARGET_MEMORY_MOVE_COST. */
8561 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8562 bool in ATTRIBUTE_UNUSED)
8568 if (GET_MODE_SIZE (mode) < 4)
8571 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8575 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8576 It corrects the value of COST based on the relationship between
8577 INSN and DEP through the dependence LINK. It returns the new
8578 value. There is a per-core adjust_cost hook to adjust scheduler costs
8579 and the per-core hook can choose to completely override the generic
8580 adjust_cost function. Only put bits of code into arm_adjust_cost that
8581 are common across all cores. */
8583 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8587 /* When generating Thumb-1 code, we want to place flag-setting operations
8588 close to a conditional branch which depends on them, so that we can
8589 omit the comparison. */
8591 && REG_NOTE_KIND (link) == 0
8592 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8593 && recog_memoized (dep) >= 0
8594 && get_attr_conds (dep) == CONDS_SET)
8597 if (current_tune->sched_adjust_cost != NULL)
8599 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8603 /* XXX This is not strictly true for the FPA. */
8604 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8605 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8608 /* Call insns don't incur a stall, even if they follow a load. */
8609 if (REG_NOTE_KIND (link) == 0
8610 && GET_CODE (insn) == CALL_INSN)
8613 if ((i_pat = single_set (insn)) != NULL
8614 && GET_CODE (SET_SRC (i_pat)) == MEM
8615 && (d_pat = single_set (dep)) != NULL
8616 && GET_CODE (SET_DEST (d_pat)) == MEM)
8618 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8619 /* This is a load after a store, there is no conflict if the load reads
8620 from a cached area. Assume that loads from the stack, and from the
8621 constant pool are cached, and that others will miss. This is a
8624 if ((GET_CODE (src_mem) == SYMBOL_REF
8625 && CONSTANT_POOL_ADDRESS_P (src_mem))
8626 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8627 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8628 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8636 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8639 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8641 return (optimize > 0) ? 2 : 0;
8645 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8647 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8650 static int fp_consts_inited = 0;
8652 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8653 static const char * const strings_fp[8] =
8656 "4", "5", "0.5", "10"
8659 static REAL_VALUE_TYPE values_fp[8];
8662 init_fp_table (void)
8668 fp_consts_inited = 1;
8670 fp_consts_inited = 8;
8672 for (i = 0; i < fp_consts_inited; i++)
8674 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8679 /* Return TRUE if rtx X is a valid immediate FP constant. */
8681 arm_const_double_rtx (rtx x)
8686 if (!fp_consts_inited)
8689 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8690 if (REAL_VALUE_MINUS_ZERO (r))
8693 for (i = 0; i < fp_consts_inited; i++)
8694 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8700 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8702 neg_const_double_rtx_ok_for_fpa (rtx x)
8707 if (!fp_consts_inited)
8710 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8711 r = real_value_negate (&r);
8712 if (REAL_VALUE_MINUS_ZERO (r))
8715 for (i = 0; i < 8; i++)
8716 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8723 /* VFPv3 has a fairly wide range of representable immediates, formed from
8724 "quarter-precision" floating-point values. These can be evaluated using this
8725 formula (with ^ for exponentiation):
8729 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8730 16 <= n <= 31 and 0 <= r <= 7.
8732 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8734 - A (most-significant) is the sign bit.
8735 - BCD are the exponent (encoded as r XOR 3).
8736 - EFGH are the mantissa (encoded as n - 16).
8739 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8740 fconst[sd] instruction, or -1 if X isn't suitable. */
8742 vfp3_const_double_index (rtx x)
8744 REAL_VALUE_TYPE r, m;
8746 unsigned HOST_WIDE_INT mantissa, mant_hi;
8747 unsigned HOST_WIDE_INT mask;
8748 HOST_WIDE_INT m1, m2;
8749 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8751 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8754 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8756 /* We can't represent these things, so detect them first. */
8757 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8760 /* Extract sign, exponent and mantissa. */
8761 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8762 r = real_value_abs (&r);
8763 exponent = REAL_EXP (&r);
8764 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8765 highest (sign) bit, with a fixed binary point at bit point_pos.
8766 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8767 bits for the mantissa, this may fail (low bits would be lost). */
8768 real_ldexp (&m, &r, point_pos - exponent);
8769 REAL_VALUE_TO_INT (&m1, &m2, m);
8773 /* If there are bits set in the low part of the mantissa, we can't
8774 represent this value. */
8778 /* Now make it so that mantissa contains the most-significant bits, and move
8779 the point_pos to indicate that the least-significant bits have been
8781 point_pos -= HOST_BITS_PER_WIDE_INT;
8784 /* We can permit four significant bits of mantissa only, plus a high bit
8785 which is always 1. */
8786 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8787 if ((mantissa & mask) != 0)
8790 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8791 mantissa >>= point_pos - 5;
8793 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8794 floating-point immediate zero with Neon using an integer-zero load, but
8795 that case is handled elsewhere.) */
8799 gcc_assert (mantissa >= 16 && mantissa <= 31);
8801 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8802 normalized significands are in the range [1, 2). (Our mantissa is shifted
8803 left 4 places at this point relative to normalized IEEE754 values). GCC
8804 internally uses [0.5, 1) (see real.c), so the exponent returned from
8805 REAL_EXP must be altered. */
8806 exponent = 5 - exponent;
8808 if (exponent < 0 || exponent > 7)
8811 /* Sign, mantissa and exponent are now in the correct form to plug into the
8812 formula described in the comment above. */
8813 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8816 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8818 vfp3_const_double_rtx (rtx x)
8823 return vfp3_const_double_index (x) != -1;
8826 /* Recognize immediates which can be used in various Neon instructions. Legal
8827 immediates are described by the following table (for VMVN variants, the
8828 bitwise inverse of the constant shown is recognized. In either case, VMOV
8829 is output and the correct instruction to use for a given constant is chosen
8830 by the assembler). The constant shown is replicated across all elements of
8831 the destination vector.
8833 insn elems variant constant (binary)
8834 ---- ----- ------- -----------------
8835 vmov i32 0 00000000 00000000 00000000 abcdefgh
8836 vmov i32 1 00000000 00000000 abcdefgh 00000000
8837 vmov i32 2 00000000 abcdefgh 00000000 00000000
8838 vmov i32 3 abcdefgh 00000000 00000000 00000000
8839 vmov i16 4 00000000 abcdefgh
8840 vmov i16 5 abcdefgh 00000000
8841 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8842 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8843 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8844 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8845 vmvn i16 10 00000000 abcdefgh
8846 vmvn i16 11 abcdefgh 00000000
8847 vmov i32 12 00000000 00000000 abcdefgh 11111111
8848 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8849 vmov i32 14 00000000 abcdefgh 11111111 11111111
8850 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8852 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8853 eeeeeeee ffffffff gggggggg hhhhhhhh
8854 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8856 For case 18, B = !b. Representable values are exactly those accepted by
8857 vfp3_const_double_index, but are output as floating-point numbers rather
8860 Variants 0-5 (inclusive) may also be used as immediates for the second
8861 operand of VORR/VBIC instructions.
8863 The INVERSE argument causes the bitwise inverse of the given operand to be
8864 recognized instead (used for recognizing legal immediates for the VAND/VORN
8865 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8866 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8867 output, rather than the real insns vbic/vorr).
8869 INVERSE makes no difference to the recognition of float vectors.
8871 The return value is the variant of immediate as shown in the above table, or
8872 -1 if the given value doesn't match any of the listed patterns.
8875 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8876 rtx *modconst, int *elementwidth)
8878 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8880 for (i = 0; i < idx; i += (STRIDE)) \
8885 immtype = (CLASS); \
8886 elsize = (ELSIZE); \
8890 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8891 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8892 unsigned char bytes[16];
8893 int immtype = -1, matches;
8894 unsigned int invmask = inverse ? 0xff : 0;
8896 /* Vectors of float constants. */
8897 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8899 rtx el0 = CONST_VECTOR_ELT (op, 0);
8902 if (!vfp3_const_double_rtx (el0))
8905 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8907 for (i = 1; i < n_elts; i++)
8909 rtx elt = CONST_VECTOR_ELT (op, i);
8912 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8914 if (!REAL_VALUES_EQUAL (r0, re))
8919 *modconst = CONST_VECTOR_ELT (op, 0);
8927 /* Splat vector constant out into a byte vector. */
8928 for (i = 0; i < n_elts; i++)
8930 rtx el = CONST_VECTOR_ELT (op, i);
8931 unsigned HOST_WIDE_INT elpart;
8932 unsigned int part, parts;
8934 if (GET_CODE (el) == CONST_INT)
8936 elpart = INTVAL (el);
8939 else if (GET_CODE (el) == CONST_DOUBLE)
8941 elpart = CONST_DOUBLE_LOW (el);
8947 for (part = 0; part < parts; part++)
8950 for (byte = 0; byte < innersize; byte++)
8952 bytes[idx++] = (elpart & 0xff) ^ invmask;
8953 elpart >>= BITS_PER_UNIT;
8955 if (GET_CODE (el) == CONST_DOUBLE)
8956 elpart = CONST_DOUBLE_HIGH (el);
8961 gcc_assert (idx == GET_MODE_SIZE (mode));
8965 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8966 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8968 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8969 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8971 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8972 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8974 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8975 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8977 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8979 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8981 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8982 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8984 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8985 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8987 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8988 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8990 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8991 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8993 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8995 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8997 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8998 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9000 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9001 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9003 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9004 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9006 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9007 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9009 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9011 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9012 && bytes[i] == bytes[(i + 8) % idx]);
9020 *elementwidth = elsize;
9024 unsigned HOST_WIDE_INT imm = 0;
9026 /* Un-invert bytes of recognized vector, if necessary. */
9028 for (i = 0; i < idx; i++)
9029 bytes[i] ^= invmask;
9033 /* FIXME: Broken on 32-bit H_W_I hosts. */
9034 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9036 for (i = 0; i < 8; i++)
9037 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9038 << (i * BITS_PER_UNIT);
9040 *modconst = GEN_INT (imm);
9044 unsigned HOST_WIDE_INT imm = 0;
9046 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9047 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9049 *modconst = GEN_INT (imm);
9057 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9058 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9059 float elements), and a modified constant (whatever should be output for a
9060 VMOV) in *MODCONST. */
9063 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9064 rtx *modconst, int *elementwidth)
9068 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9074 *modconst = tmpconst;
9077 *elementwidth = tmpwidth;
9082 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9083 the immediate is valid, write a constant suitable for using as an operand
9084 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9085 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9088 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9089 rtx *modconst, int *elementwidth)
9093 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9095 if (retval < 0 || retval > 5)
9099 *modconst = tmpconst;
9102 *elementwidth = tmpwidth;
9107 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9108 the immediate is valid, write a constant suitable for using as an operand
9109 to VSHR/VSHL to *MODCONST and the corresponding element width to
9110 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9111 because they have different limitations. */
9114 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9115 rtx *modconst, int *elementwidth,
9118 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9119 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9120 unsigned HOST_WIDE_INT last_elt = 0;
9121 unsigned HOST_WIDE_INT maxshift;
9123 /* Split vector constant out into a byte vector. */
9124 for (i = 0; i < n_elts; i++)
9126 rtx el = CONST_VECTOR_ELT (op, i);
9127 unsigned HOST_WIDE_INT elpart;
9129 if (GET_CODE (el) == CONST_INT)
9130 elpart = INTVAL (el);
9131 else if (GET_CODE (el) == CONST_DOUBLE)
9136 if (i != 0 && elpart != last_elt)
9142 /* Shift less than element size. */
9143 maxshift = innersize * 8;
9147 /* Left shift immediate value can be from 0 to <size>-1. */
9148 if (last_elt >= maxshift)
9153 /* Right shift immediate value can be from 1 to <size>. */
9154 if (last_elt == 0 || last_elt > maxshift)
9159 *elementwidth = innersize * 8;
9162 *modconst = CONST_VECTOR_ELT (op, 0);
9167 /* Return a string suitable for output of Neon immediate logic operation
9171 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9172 int inverse, int quad)
9174 int width, is_valid;
9175 static char templ[40];
9177 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9179 gcc_assert (is_valid != 0);
9182 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9184 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9189 /* Return a string suitable for output of Neon immediate shift operation
9190 (VSHR or VSHL) MNEM. */
9193 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9194 enum machine_mode mode, int quad,
9197 int width, is_valid;
9198 static char templ[40];
9200 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9201 gcc_assert (is_valid != 0);
9204 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9206 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9211 /* Output a sequence of pairwise operations to implement a reduction.
9212 NOTE: We do "too much work" here, because pairwise operations work on two
9213 registers-worth of operands in one go. Unfortunately we can't exploit those
9214 extra calculations to do the full operation in fewer steps, I don't think.
9215 Although all vector elements of the result but the first are ignored, we
9216 actually calculate the same result in each of the elements. An alternative
9217 such as initially loading a vector with zero to use as each of the second
9218 operands would use up an additional register and take an extra instruction,
9219 for no particular gain. */
9222 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9223 rtx (*reduc) (rtx, rtx, rtx))
9225 enum machine_mode inner = GET_MODE_INNER (mode);
9226 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9229 for (i = parts / 2; i >= 1; i /= 2)
9231 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9232 emit_insn (reduc (dest, tmpsum, tmpsum));
9237 /* If VALS is a vector constant that can be loaded into a register
9238 using VDUP, generate instructions to do so and return an RTX to
9239 assign to the register. Otherwise return NULL_RTX. */
9242 neon_vdup_constant (rtx vals)
9244 enum machine_mode mode = GET_MODE (vals);
9245 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9246 int n_elts = GET_MODE_NUNITS (mode);
9247 bool all_same = true;
9251 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9254 for (i = 0; i < n_elts; ++i)
9256 x = XVECEXP (vals, 0, i);
9257 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9262 /* The elements are not all the same. We could handle repeating
9263 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9264 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9268 /* We can load this constant by using VDUP and a constant in a
9269 single ARM register. This will be cheaper than a vector
9272 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9273 return gen_rtx_VEC_DUPLICATE (mode, x);
9276 /* Generate code to load VALS, which is a PARALLEL containing only
9277 constants (for vec_init) or CONST_VECTOR, efficiently into a
9278 register. Returns an RTX to copy into the register, or NULL_RTX
9279 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9282 neon_make_constant (rtx vals)
9284 enum machine_mode mode = GET_MODE (vals);
9286 rtx const_vec = NULL_RTX;
9287 int n_elts = GET_MODE_NUNITS (mode);
9291 if (GET_CODE (vals) == CONST_VECTOR)
9293 else if (GET_CODE (vals) == PARALLEL)
9295 /* A CONST_VECTOR must contain only CONST_INTs and
9296 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9297 Only store valid constants in a CONST_VECTOR. */
9298 for (i = 0; i < n_elts; ++i)
9300 rtx x = XVECEXP (vals, 0, i);
9301 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9304 if (n_const == n_elts)
9305 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9310 if (const_vec != NULL
9311 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9312 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9314 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9315 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9316 pipeline cycle; creating the constant takes one or two ARM
9319 else if (const_vec != NULL_RTX)
9320 /* Load from constant pool. On Cortex-A8 this takes two cycles
9321 (for either double or quad vectors). We can not take advantage
9322 of single-cycle VLD1 because we need a PC-relative addressing
9326 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9327 We can not construct an initializer. */
9331 /* Initialize vector TARGET to VALS. */
9334 neon_expand_vector_init (rtx target, rtx vals)
9336 enum machine_mode mode = GET_MODE (target);
9337 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9338 int n_elts = GET_MODE_NUNITS (mode);
9339 int n_var = 0, one_var = -1;
9340 bool all_same = true;
9344 for (i = 0; i < n_elts; ++i)
9346 x = XVECEXP (vals, 0, i);
9347 if (!CONSTANT_P (x))
9348 ++n_var, one_var = i;
9350 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9356 rtx constant = neon_make_constant (vals);
9357 if (constant != NULL_RTX)
9359 emit_move_insn (target, constant);
9364 /* Splat a single non-constant element if we can. */
9365 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9367 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9368 emit_insn (gen_rtx_SET (VOIDmode, target,
9369 gen_rtx_VEC_DUPLICATE (mode, x)));
9373 /* One field is non-constant. Load constant then overwrite varying
9374 field. This is more efficient than using the stack. */
9377 rtx copy = copy_rtx (vals);
9378 rtx index = GEN_INT (one_var);
9380 /* Load constant part of vector, substitute neighboring value for
9382 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9383 neon_expand_vector_init (target, copy);
9385 /* Insert variable. */
9386 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9390 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9393 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9396 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9399 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9402 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9405 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9408 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9411 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9414 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9422 /* Construct the vector in memory one field at a time
9423 and load the whole vector. */
9424 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9425 for (i = 0; i < n_elts; i++)
9426 emit_move_insn (adjust_address_nv (mem, inner_mode,
9427 i * GET_MODE_SIZE (inner_mode)),
9428 XVECEXP (vals, 0, i));
9429 emit_move_insn (target, mem);
9432 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9433 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9434 reported source locations are bogus. */
9437 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9442 gcc_assert (GET_CODE (operand) == CONST_INT);
9444 lane = INTVAL (operand);
9446 if (lane < low || lane >= high)
9450 /* Bounds-check lanes. */
9453 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9455 bounds_check (operand, low, high, "lane out of range");
9458 /* Bounds-check constants. */
9461 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9463 bounds_check (operand, low, high, "constant out of range");
9467 neon_element_bits (enum machine_mode mode)
9470 return GET_MODE_BITSIZE (mode);
9472 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9476 /* Predicates for `match_operand' and `match_operator'. */
9478 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9480 cirrus_memory_offset (rtx op)
9482 /* Reject eliminable registers. */
9483 if (! (reload_in_progress || reload_completed)
9484 && ( reg_mentioned_p (frame_pointer_rtx, op)
9485 || reg_mentioned_p (arg_pointer_rtx, op)
9486 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9487 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9488 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9489 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9492 if (GET_CODE (op) == MEM)
9498 /* Match: (mem (reg)). */
9499 if (GET_CODE (ind) == REG)
9505 if (GET_CODE (ind) == PLUS
9506 && GET_CODE (XEXP (ind, 0)) == REG
9507 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9508 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9515 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9516 WB is true if full writeback address modes are allowed and is false
9517 if limited writeback address modes (POST_INC and PRE_DEC) are
9521 arm_coproc_mem_operand (rtx op, bool wb)
9525 /* Reject eliminable registers. */
9526 if (! (reload_in_progress || reload_completed)
9527 && ( reg_mentioned_p (frame_pointer_rtx, op)
9528 || reg_mentioned_p (arg_pointer_rtx, op)
9529 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9530 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9531 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9532 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9535 /* Constants are converted into offsets from labels. */
9536 if (GET_CODE (op) != MEM)
9541 if (reload_completed
9542 && (GET_CODE (ind) == LABEL_REF
9543 || (GET_CODE (ind) == CONST
9544 && GET_CODE (XEXP (ind, 0)) == PLUS
9545 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9546 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9549 /* Match: (mem (reg)). */
9550 if (GET_CODE (ind) == REG)
9551 return arm_address_register_rtx_p (ind, 0);
9553 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9554 acceptable in any case (subject to verification by
9555 arm_address_register_rtx_p). We need WB to be true to accept
9556 PRE_INC and POST_DEC. */
9557 if (GET_CODE (ind) == POST_INC
9558 || GET_CODE (ind) == PRE_DEC
9560 && (GET_CODE (ind) == PRE_INC
9561 || GET_CODE (ind) == POST_DEC)))
9562 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9565 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9566 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9567 && GET_CODE (XEXP (ind, 1)) == PLUS
9568 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9569 ind = XEXP (ind, 1);
9574 if (GET_CODE (ind) == PLUS
9575 && GET_CODE (XEXP (ind, 0)) == REG
9576 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9577 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9578 && INTVAL (XEXP (ind, 1)) > -1024
9579 && INTVAL (XEXP (ind, 1)) < 1024
9580 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9586 /* Return TRUE if OP is a memory operand which we can load or store a vector
9587 to/from. TYPE is one of the following values:
9588 0 - Vector load/stor (vldr)
9589 1 - Core registers (ldm)
9590 2 - Element/structure loads (vld1)
9593 neon_vector_mem_operand (rtx op, int type)
9597 /* Reject eliminable registers. */
9598 if (! (reload_in_progress || reload_completed)
9599 && ( reg_mentioned_p (frame_pointer_rtx, op)
9600 || reg_mentioned_p (arg_pointer_rtx, op)
9601 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9602 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9603 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9604 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9607 /* Constants are converted into offsets from labels. */
9608 if (GET_CODE (op) != MEM)
9613 if (reload_completed
9614 && (GET_CODE (ind) == LABEL_REF
9615 || (GET_CODE (ind) == CONST
9616 && GET_CODE (XEXP (ind, 0)) == PLUS
9617 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9618 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9621 /* Match: (mem (reg)). */
9622 if (GET_CODE (ind) == REG)
9623 return arm_address_register_rtx_p (ind, 0);
9625 /* Allow post-increment with Neon registers. */
9626 if ((type != 1 && GET_CODE (ind) == POST_INC)
9627 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9628 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9630 /* FIXME: vld1 allows register post-modify. */
9636 && GET_CODE (ind) == PLUS
9637 && GET_CODE (XEXP (ind, 0)) == REG
9638 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9639 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9640 && INTVAL (XEXP (ind, 1)) > -1024
9641 && INTVAL (XEXP (ind, 1)) < 1016
9642 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9648 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9651 neon_struct_mem_operand (rtx op)
9655 /* Reject eliminable registers. */
9656 if (! (reload_in_progress || reload_completed)
9657 && ( reg_mentioned_p (frame_pointer_rtx, op)
9658 || reg_mentioned_p (arg_pointer_rtx, op)
9659 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9660 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9661 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9662 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9665 /* Constants are converted into offsets from labels. */
9666 if (GET_CODE (op) != MEM)
9671 if (reload_completed
9672 && (GET_CODE (ind) == LABEL_REF
9673 || (GET_CODE (ind) == CONST
9674 && GET_CODE (XEXP (ind, 0)) == PLUS
9675 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9676 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9679 /* Match: (mem (reg)). */
9680 if (GET_CODE (ind) == REG)
9681 return arm_address_register_rtx_p (ind, 0);
9683 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9684 if (GET_CODE (ind) == POST_INC
9685 || GET_CODE (ind) == PRE_DEC)
9686 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9691 /* Return true if X is a register that will be eliminated later on. */
9693 arm_eliminable_register (rtx x)
9695 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9696 || REGNO (x) == ARG_POINTER_REGNUM
9697 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9698 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9701 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9702 coprocessor registers. Otherwise return NO_REGS. */
9705 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9709 if (!TARGET_NEON_FP16)
9710 return GENERAL_REGS;
9711 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9713 return GENERAL_REGS;
9716 /* The neon move patterns handle all legitimate vector and struct
9719 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9720 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9721 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9722 || VALID_NEON_STRUCT_MODE (mode)))
9725 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9728 return GENERAL_REGS;
9731 /* Values which must be returned in the most-significant end of the return
9735 arm_return_in_msb (const_tree valtype)
9737 return (TARGET_AAPCS_BASED
9739 && (AGGREGATE_TYPE_P (valtype)
9740 || TREE_CODE (valtype) == COMPLEX_TYPE
9741 || FIXED_POINT_TYPE_P (valtype)));
9744 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9745 Use by the Cirrus Maverick code which has to workaround
9746 a hardware bug triggered by such instructions. */
9748 arm_memory_load_p (rtx insn)
9750 rtx body, lhs, rhs;;
9752 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9755 body = PATTERN (insn);
9757 if (GET_CODE (body) != SET)
9760 lhs = XEXP (body, 0);
9761 rhs = XEXP (body, 1);
9763 lhs = REG_OR_SUBREG_RTX (lhs);
9765 /* If the destination is not a general purpose
9766 register we do not have to worry. */
9767 if (GET_CODE (lhs) != REG
9768 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9771 /* As well as loads from memory we also have to react
9772 to loads of invalid constants which will be turned
9773 into loads from the minipool. */
9774 return (GET_CODE (rhs) == MEM
9775 || GET_CODE (rhs) == SYMBOL_REF
9776 || note_invalid_constants (insn, -1, false));
9779 /* Return TRUE if INSN is a Cirrus instruction. */
9781 arm_cirrus_insn_p (rtx insn)
9783 enum attr_cirrus attr;
9785 /* get_attr cannot accept USE or CLOBBER. */
9787 || GET_CODE (insn) != INSN
9788 || GET_CODE (PATTERN (insn)) == USE
9789 || GET_CODE (PATTERN (insn)) == CLOBBER)
9792 attr = get_attr_cirrus (insn);
9794 return attr != CIRRUS_NOT;
9797 /* Cirrus reorg for invalid instruction combinations. */
9799 cirrus_reorg (rtx first)
9801 enum attr_cirrus attr;
9802 rtx body = PATTERN (first);
9806 /* Any branch must be followed by 2 non Cirrus instructions. */
9807 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9810 t = next_nonnote_insn (first);
9812 if (arm_cirrus_insn_p (t))
9815 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9819 emit_insn_after (gen_nop (), first);
9824 /* (float (blah)) is in parallel with a clobber. */
9825 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9826 body = XVECEXP (body, 0, 0);
9828 if (GET_CODE (body) == SET)
9830 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9832 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9833 be followed by a non Cirrus insn. */
9834 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9836 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9837 emit_insn_after (gen_nop (), first);
9841 else if (arm_memory_load_p (first))
9843 unsigned int arm_regno;
9845 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9846 ldr/cfmv64hr combination where the Rd field is the same
9847 in both instructions must be split with a non Cirrus
9854 /* Get Arm register number for ldr insn. */
9855 if (GET_CODE (lhs) == REG)
9856 arm_regno = REGNO (lhs);
9859 gcc_assert (GET_CODE (rhs) == REG);
9860 arm_regno = REGNO (rhs);
9864 first = next_nonnote_insn (first);
9866 if (! arm_cirrus_insn_p (first))
9869 body = PATTERN (first);
9871 /* (float (blah)) is in parallel with a clobber. */
9872 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9873 body = XVECEXP (body, 0, 0);
9875 if (GET_CODE (body) == FLOAT)
9876 body = XEXP (body, 0);
9878 if (get_attr_cirrus (first) == CIRRUS_MOVE
9879 && GET_CODE (XEXP (body, 1)) == REG
9880 && arm_regno == REGNO (XEXP (body, 1)))
9881 emit_insn_after (gen_nop (), first);
9887 /* get_attr cannot accept USE or CLOBBER. */
9889 || GET_CODE (first) != INSN
9890 || GET_CODE (PATTERN (first)) == USE
9891 || GET_CODE (PATTERN (first)) == CLOBBER)
9894 attr = get_attr_cirrus (first);
9896 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9897 must be followed by a non-coprocessor instruction. */
9898 if (attr == CIRRUS_COMPARE)
9902 t = next_nonnote_insn (first);
9904 if (arm_cirrus_insn_p (t))
9907 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9911 emit_insn_after (gen_nop (), first);
9917 /* Return TRUE if X references a SYMBOL_REF. */
9919 symbol_mentioned_p (rtx x)
9924 if (GET_CODE (x) == SYMBOL_REF)
9927 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9928 are constant offsets, not symbols. */
9929 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9932 fmt = GET_RTX_FORMAT (GET_CODE (x));
9934 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9940 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9941 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9944 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9951 /* Return TRUE if X references a LABEL_REF. */
9953 label_mentioned_p (rtx x)
9958 if (GET_CODE (x) == LABEL_REF)
9961 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9962 instruction, but they are constant offsets, not symbols. */
9963 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9966 fmt = GET_RTX_FORMAT (GET_CODE (x));
9967 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9973 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9974 if (label_mentioned_p (XVECEXP (x, i, j)))
9977 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9985 tls_mentioned_p (rtx x)
9987 switch (GET_CODE (x))
9990 return tls_mentioned_p (XEXP (x, 0));
9993 if (XINT (x, 1) == UNSPEC_TLS)
10001 /* Must not copy any rtx that uses a pc-relative address. */
10004 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10006 if (GET_CODE (*x) == UNSPEC
10007 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10008 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10014 arm_cannot_copy_insn_p (rtx insn)
10016 /* The tls call insn cannot be copied, as it is paired with a data
10018 if (recog_memoized (insn) == CODE_FOR_tlscall)
10021 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10025 minmax_code (rtx x)
10027 enum rtx_code code = GET_CODE (x);
10040 gcc_unreachable ();
10044 /* Return 1 if memory locations are adjacent. */
10046 adjacent_mem_locations (rtx a, rtx b)
10048 /* We don't guarantee to preserve the order of these memory refs. */
10049 if (volatile_refs_p (a) || volatile_refs_p (b))
10052 if ((GET_CODE (XEXP (a, 0)) == REG
10053 || (GET_CODE (XEXP (a, 0)) == PLUS
10054 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10055 && (GET_CODE (XEXP (b, 0)) == REG
10056 || (GET_CODE (XEXP (b, 0)) == PLUS
10057 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10059 HOST_WIDE_INT val0 = 0, val1 = 0;
10063 if (GET_CODE (XEXP (a, 0)) == PLUS)
10065 reg0 = XEXP (XEXP (a, 0), 0);
10066 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10069 reg0 = XEXP (a, 0);
10071 if (GET_CODE (XEXP (b, 0)) == PLUS)
10073 reg1 = XEXP (XEXP (b, 0), 0);
10074 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10077 reg1 = XEXP (b, 0);
10079 /* Don't accept any offset that will require multiple
10080 instructions to handle, since this would cause the
10081 arith_adjacentmem pattern to output an overlong sequence. */
10082 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10085 /* Don't allow an eliminable register: register elimination can make
10086 the offset too large. */
10087 if (arm_eliminable_register (reg0))
10090 val_diff = val1 - val0;
10094 /* If the target has load delay slots, then there's no benefit
10095 to using an ldm instruction unless the offset is zero and
10096 we are optimizing for size. */
10097 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10098 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10099 && (val_diff == 4 || val_diff == -4));
10102 return ((REGNO (reg0) == REGNO (reg1))
10103 && (val_diff == 4 || val_diff == -4));
10109 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10110 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10111 instruction. ADD_OFFSET is nonzero if the base address register needs
10112 to be modified with an add instruction before we can use it. */
10115 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10116 int nops, HOST_WIDE_INT add_offset)
10118 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10119 if the offset isn't small enough. The reason 2 ldrs are faster
10120 is because these ARMs are able to do more than one cache access
10121 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10122 whilst the ARM8 has a double bandwidth cache. This means that
10123 these cores can do both an instruction fetch and a data fetch in
10124 a single cycle, so the trick of calculating the address into a
10125 scratch register (one of the result regs) and then doing a load
10126 multiple actually becomes slower (and no smaller in code size).
10127 That is the transformation
10129 ldr rd1, [rbase + offset]
10130 ldr rd2, [rbase + offset + 4]
10134 add rd1, rbase, offset
10135 ldmia rd1, {rd1, rd2}
10137 produces worse code -- '3 cycles + any stalls on rd2' instead of
10138 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10139 access per cycle, the first sequence could never complete in less
10140 than 6 cycles, whereas the ldm sequence would only take 5 and
10141 would make better use of sequential accesses if not hitting the
10144 We cheat here and test 'arm_ld_sched' which we currently know to
10145 only be true for the ARM8, ARM9 and StrongARM. If this ever
10146 changes, then the test below needs to be reworked. */
10147 if (nops == 2 && arm_ld_sched && add_offset != 0)
10150 /* XScale has load-store double instructions, but they have stricter
10151 alignment requirements than load-store multiple, so we cannot
10154 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10155 the pipeline until completion.
10163 An ldr instruction takes 1-3 cycles, but does not block the
10172 Best case ldr will always win. However, the more ldr instructions
10173 we issue, the less likely we are to be able to schedule them well.
10174 Using ldr instructions also increases code size.
10176 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10177 for counts of 3 or 4 regs. */
10178 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10183 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10184 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10185 an array ORDER which describes the sequence to use when accessing the
10186 offsets that produces an ascending order. In this sequence, each
10187 offset must be larger by exactly 4 than the previous one. ORDER[0]
10188 must have been filled in with the lowest offset by the caller.
10189 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10190 we use to verify that ORDER produces an ascending order of registers.
10191 Return true if it was possible to construct such an order, false if
10195 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10196 int *unsorted_regs)
10199 for (i = 1; i < nops; i++)
10203 order[i] = order[i - 1];
10204 for (j = 0; j < nops; j++)
10205 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10207 /* We must find exactly one offset that is higher than the
10208 previous one by 4. */
10209 if (order[i] != order[i - 1])
10213 if (order[i] == order[i - 1])
10215 /* The register numbers must be ascending. */
10216 if (unsorted_regs != NULL
10217 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10223 /* Used to determine in a peephole whether a sequence of load
10224 instructions can be changed into a load-multiple instruction.
10225 NOPS is the number of separate load instructions we are examining. The
10226 first NOPS entries in OPERANDS are the destination registers, the
10227 next NOPS entries are memory operands. If this function is
10228 successful, *BASE is set to the common base register of the memory
10229 accesses; *LOAD_OFFSET is set to the first memory location's offset
10230 from that base register.
10231 REGS is an array filled in with the destination register numbers.
10232 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10233 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10234 the sequence of registers in REGS matches the loads from ascending memory
10235 locations, and the function verifies that the register numbers are
10236 themselves ascending. If CHECK_REGS is false, the register numbers
10237 are stored in the order they are found in the operands. */
10239 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10240 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10242 int unsorted_regs[MAX_LDM_STM_OPS];
10243 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10244 int order[MAX_LDM_STM_OPS];
10245 rtx base_reg_rtx = NULL;
10249 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10250 easily extended if required. */
10251 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10253 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10255 /* Loop over the operands and check that the memory references are
10256 suitable (i.e. immediate offsets from the same base register). At
10257 the same time, extract the target register, and the memory
10259 for (i = 0; i < nops; i++)
10264 /* Convert a subreg of a mem into the mem itself. */
10265 if (GET_CODE (operands[nops + i]) == SUBREG)
10266 operands[nops + i] = alter_subreg (operands + (nops + i));
10268 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10270 /* Don't reorder volatile memory references; it doesn't seem worth
10271 looking for the case where the order is ok anyway. */
10272 if (MEM_VOLATILE_P (operands[nops + i]))
10275 offset = const0_rtx;
10277 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10278 || (GET_CODE (reg) == SUBREG
10279 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10280 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10281 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10283 || (GET_CODE (reg) == SUBREG
10284 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10285 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10290 base_reg = REGNO (reg);
10291 base_reg_rtx = reg;
10292 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10295 else if (base_reg != (int) REGNO (reg))
10296 /* Not addressed from the same base register. */
10299 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10300 ? REGNO (operands[i])
10301 : REGNO (SUBREG_REG (operands[i])));
10303 /* If it isn't an integer register, or if it overwrites the
10304 base register but isn't the last insn in the list, then
10305 we can't do this. */
10306 if (unsorted_regs[i] < 0
10307 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10308 || unsorted_regs[i] > 14
10309 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10312 unsorted_offsets[i] = INTVAL (offset);
10313 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10317 /* Not a suitable memory address. */
10321 /* All the useful information has now been extracted from the
10322 operands into unsorted_regs and unsorted_offsets; additionally,
10323 order[0] has been set to the lowest offset in the list. Sort
10324 the offsets into order, verifying that they are adjacent, and
10325 check that the register numbers are ascending. */
10326 if (!compute_offset_order (nops, unsorted_offsets, order,
10327 check_regs ? unsorted_regs : NULL))
10331 memcpy (saved_order, order, sizeof order);
10337 for (i = 0; i < nops; i++)
10338 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10340 *load_offset = unsorted_offsets[order[0]];
10344 && !peep2_reg_dead_p (nops, base_reg_rtx))
10347 if (unsorted_offsets[order[0]] == 0)
10348 ldm_case = 1; /* ldmia */
10349 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10350 ldm_case = 2; /* ldmib */
10351 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10352 ldm_case = 3; /* ldmda */
10353 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10354 ldm_case = 4; /* ldmdb */
10355 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10356 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10361 if (!multiple_operation_profitable_p (false, nops,
10363 ? unsorted_offsets[order[0]] : 0))
10369 /* Used to determine in a peephole whether a sequence of store instructions can
10370 be changed into a store-multiple instruction.
10371 NOPS is the number of separate store instructions we are examining.
10372 NOPS_TOTAL is the total number of instructions recognized by the peephole
10374 The first NOPS entries in OPERANDS are the source registers, the next
10375 NOPS entries are memory operands. If this function is successful, *BASE is
10376 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10377 to the first memory location's offset from that base register. REGS is an
10378 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10379 likewise filled with the corresponding rtx's.
10380 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10381 numbers to an ascending order of stores.
10382 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10383 from ascending memory locations, and the function verifies that the register
10384 numbers are themselves ascending. If CHECK_REGS is false, the register
10385 numbers are stored in the order they are found in the operands. */
10387 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10388 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10389 HOST_WIDE_INT *load_offset, bool check_regs)
10391 int unsorted_regs[MAX_LDM_STM_OPS];
10392 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10393 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10394 int order[MAX_LDM_STM_OPS];
10396 rtx base_reg_rtx = NULL;
10399 /* Write back of base register is currently only supported for Thumb 1. */
10400 int base_writeback = TARGET_THUMB1;
10402 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10403 easily extended if required. */
10404 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10406 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10408 /* Loop over the operands and check that the memory references are
10409 suitable (i.e. immediate offsets from the same base register). At
10410 the same time, extract the target register, and the memory
10412 for (i = 0; i < nops; i++)
10417 /* Convert a subreg of a mem into the mem itself. */
10418 if (GET_CODE (operands[nops + i]) == SUBREG)
10419 operands[nops + i] = alter_subreg (operands + (nops + i));
10421 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10423 /* Don't reorder volatile memory references; it doesn't seem worth
10424 looking for the case where the order is ok anyway. */
10425 if (MEM_VOLATILE_P (operands[nops + i]))
10428 offset = const0_rtx;
10430 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10431 || (GET_CODE (reg) == SUBREG
10432 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10433 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10434 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10436 || (GET_CODE (reg) == SUBREG
10437 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10438 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10441 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10442 ? operands[i] : SUBREG_REG (operands[i]));
10443 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10447 base_reg = REGNO (reg);
10448 base_reg_rtx = reg;
10449 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10452 else if (base_reg != (int) REGNO (reg))
10453 /* Not addressed from the same base register. */
10456 /* If it isn't an integer register, then we can't do this. */
10457 if (unsorted_regs[i] < 0
10458 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10459 /* The effects are unpredictable if the base register is
10460 both updated and stored. */
10461 || (base_writeback && unsorted_regs[i] == base_reg)
10462 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10463 || unsorted_regs[i] > 14)
10466 unsorted_offsets[i] = INTVAL (offset);
10467 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10471 /* Not a suitable memory address. */
10475 /* All the useful information has now been extracted from the
10476 operands into unsorted_regs and unsorted_offsets; additionally,
10477 order[0] has been set to the lowest offset in the list. Sort
10478 the offsets into order, verifying that they are adjacent, and
10479 check that the register numbers are ascending. */
10480 if (!compute_offset_order (nops, unsorted_offsets, order,
10481 check_regs ? unsorted_regs : NULL))
10485 memcpy (saved_order, order, sizeof order);
10491 for (i = 0; i < nops; i++)
10493 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10495 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10498 *load_offset = unsorted_offsets[order[0]];
10502 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10505 if (unsorted_offsets[order[0]] == 0)
10506 stm_case = 1; /* stmia */
10507 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10508 stm_case = 2; /* stmib */
10509 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10510 stm_case = 3; /* stmda */
10511 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10512 stm_case = 4; /* stmdb */
10516 if (!multiple_operation_profitable_p (false, nops, 0))
10522 /* Routines for use in generating RTL. */
10524 /* Generate a load-multiple instruction. COUNT is the number of loads in
10525 the instruction; REGS and MEMS are arrays containing the operands.
10526 BASEREG is the base register to be used in addressing the memory operands.
10527 WBACK_OFFSET is nonzero if the instruction should update the base
10531 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10532 HOST_WIDE_INT wback_offset)
10537 if (!multiple_operation_profitable_p (false, count, 0))
10543 for (i = 0; i < count; i++)
10544 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10546 if (wback_offset != 0)
10547 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10549 seq = get_insns ();
10555 result = gen_rtx_PARALLEL (VOIDmode,
10556 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10557 if (wback_offset != 0)
10559 XVECEXP (result, 0, 0)
10560 = gen_rtx_SET (VOIDmode, basereg,
10561 plus_constant (basereg, wback_offset));
10566 for (j = 0; i < count; i++, j++)
10567 XVECEXP (result, 0, i)
10568 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10573 /* Generate a store-multiple instruction. COUNT is the number of stores in
10574 the instruction; REGS and MEMS are arrays containing the operands.
10575 BASEREG is the base register to be used in addressing the memory operands.
10576 WBACK_OFFSET is nonzero if the instruction should update the base
10580 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10581 HOST_WIDE_INT wback_offset)
10586 if (GET_CODE (basereg) == PLUS)
10587 basereg = XEXP (basereg, 0);
10589 if (!multiple_operation_profitable_p (false, count, 0))
10595 for (i = 0; i < count; i++)
10596 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10598 if (wback_offset != 0)
10599 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10601 seq = get_insns ();
10607 result = gen_rtx_PARALLEL (VOIDmode,
10608 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10609 if (wback_offset != 0)
10611 XVECEXP (result, 0, 0)
10612 = gen_rtx_SET (VOIDmode, basereg,
10613 plus_constant (basereg, wback_offset));
10618 for (j = 0; i < count; i++, j++)
10619 XVECEXP (result, 0, i)
10620 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10625 /* Generate either a load-multiple or a store-multiple instruction. This
10626 function can be used in situations where we can start with a single MEM
10627 rtx and adjust its address upwards.
10628 COUNT is the number of operations in the instruction, not counting a
10629 possible update of the base register. REGS is an array containing the
10631 BASEREG is the base register to be used in addressing the memory operands,
10632 which are constructed from BASEMEM.
10633 WRITE_BACK specifies whether the generated instruction should include an
10634 update of the base register.
10635 OFFSETP is used to pass an offset to and from this function; this offset
10636 is not used when constructing the address (instead BASEMEM should have an
10637 appropriate offset in its address), it is used only for setting
10638 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10641 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10642 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10644 rtx mems[MAX_LDM_STM_OPS];
10645 HOST_WIDE_INT offset = *offsetp;
10648 gcc_assert (count <= MAX_LDM_STM_OPS);
10650 if (GET_CODE (basereg) == PLUS)
10651 basereg = XEXP (basereg, 0);
10653 for (i = 0; i < count; i++)
10655 rtx addr = plus_constant (basereg, i * 4);
10656 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10664 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10665 write_back ? 4 * count : 0);
10667 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10668 write_back ? 4 * count : 0);
10672 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10673 rtx basemem, HOST_WIDE_INT *offsetp)
10675 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10680 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10681 rtx basemem, HOST_WIDE_INT *offsetp)
10683 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10687 /* Called from a peephole2 expander to turn a sequence of loads into an
10688 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10689 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10690 is true if we can reorder the registers because they are used commutatively
10692 Returns true iff we could generate a new instruction. */
10695 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10697 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10698 rtx mems[MAX_LDM_STM_OPS];
10699 int i, j, base_reg;
10701 HOST_WIDE_INT offset;
10702 int write_back = FALSE;
10706 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10707 &base_reg, &offset, !sort_regs);
10713 for (i = 0; i < nops - 1; i++)
10714 for (j = i + 1; j < nops; j++)
10715 if (regs[i] > regs[j])
10721 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10725 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10726 gcc_assert (ldm_case == 1 || ldm_case == 5);
10732 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10733 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10735 if (!TARGET_THUMB1)
10737 base_reg = regs[0];
10738 base_reg_rtx = newbase;
10742 for (i = 0; i < nops; i++)
10744 addr = plus_constant (base_reg_rtx, offset + i * 4);
10745 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10748 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10749 write_back ? offset + i * 4 : 0));
10753 /* Called from a peephole2 expander to turn a sequence of stores into an
10754 STM instruction. OPERANDS are the operands found by the peephole matcher;
10755 NOPS indicates how many separate stores we are trying to combine.
10756 Returns true iff we could generate a new instruction. */
10759 gen_stm_seq (rtx *operands, int nops)
10762 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10763 rtx mems[MAX_LDM_STM_OPS];
10766 HOST_WIDE_INT offset;
10767 int write_back = FALSE;
10770 bool base_reg_dies;
10772 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10773 mem_order, &base_reg, &offset, true);
10778 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10780 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10783 gcc_assert (base_reg_dies);
10789 gcc_assert (base_reg_dies);
10790 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10794 addr = plus_constant (base_reg_rtx, offset);
10796 for (i = 0; i < nops; i++)
10798 addr = plus_constant (base_reg_rtx, offset + i * 4);
10799 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10802 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10803 write_back ? offset + i * 4 : 0));
10807 /* Called from a peephole2 expander to turn a sequence of stores that are
10808 preceded by constant loads into an STM instruction. OPERANDS are the
10809 operands found by the peephole matcher; NOPS indicates how many
10810 separate stores we are trying to combine; there are 2 * NOPS
10811 instructions in the peephole.
10812 Returns true iff we could generate a new instruction. */
10815 gen_const_stm_seq (rtx *operands, int nops)
10817 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10818 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10819 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10820 rtx mems[MAX_LDM_STM_OPS];
10823 HOST_WIDE_INT offset;
10824 int write_back = FALSE;
10827 bool base_reg_dies;
10829 HARD_REG_SET allocated;
10831 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10832 mem_order, &base_reg, &offset, false);
10837 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10839 /* If the same register is used more than once, try to find a free
10841 CLEAR_HARD_REG_SET (allocated);
10842 for (i = 0; i < nops; i++)
10844 for (j = i + 1; j < nops; j++)
10845 if (regs[i] == regs[j])
10847 rtx t = peep2_find_free_register (0, nops * 2,
10848 TARGET_THUMB1 ? "l" : "r",
10849 SImode, &allocated);
10853 regs[i] = REGNO (t);
10857 /* Compute an ordering that maps the register numbers to an ascending
10860 for (i = 0; i < nops; i++)
10861 if (regs[i] < regs[reg_order[0]])
10864 for (i = 1; i < nops; i++)
10866 int this_order = reg_order[i - 1];
10867 for (j = 0; j < nops; j++)
10868 if (regs[j] > regs[reg_order[i - 1]]
10869 && (this_order == reg_order[i - 1]
10870 || regs[j] < regs[this_order]))
10872 reg_order[i] = this_order;
10875 /* Ensure that registers that must be live after the instruction end
10876 up with the correct value. */
10877 for (i = 0; i < nops; i++)
10879 int this_order = reg_order[i];
10880 if ((this_order != mem_order[i]
10881 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10882 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10886 /* Load the constants. */
10887 for (i = 0; i < nops; i++)
10889 rtx op = operands[2 * nops + mem_order[i]];
10890 sorted_regs[i] = regs[reg_order[i]];
10891 emit_move_insn (reg_rtxs[reg_order[i]], op);
10894 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10896 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10899 gcc_assert (base_reg_dies);
10905 gcc_assert (base_reg_dies);
10906 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10910 addr = plus_constant (base_reg_rtx, offset);
10912 for (i = 0; i < nops; i++)
10914 addr = plus_constant (base_reg_rtx, offset + i * 4);
10915 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10918 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10919 write_back ? offset + i * 4 : 0));
10923 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10924 unaligned copies on processors which support unaligned semantics for those
10925 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10926 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10927 An interleave factor of 1 (the minimum) will perform no interleaving.
10928 Load/store multiple are used for aligned addresses where possible. */
10931 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10932 HOST_WIDE_INT length,
10933 unsigned int interleave_factor)
10935 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10936 int *regnos = XALLOCAVEC (int, interleave_factor);
10937 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10938 HOST_WIDE_INT i, j;
10939 HOST_WIDE_INT remaining = length, words;
10940 rtx halfword_tmp = NULL, byte_tmp = NULL;
10942 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10943 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10944 HOST_WIDE_INT srcoffset, dstoffset;
10945 HOST_WIDE_INT src_autoinc, dst_autoinc;
10948 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10950 /* Use hard registers if we have aligned source or destination so we can use
10951 load/store multiple with contiguous registers. */
10952 if (dst_aligned || src_aligned)
10953 for (i = 0; i < interleave_factor; i++)
10954 regs[i] = gen_rtx_REG (SImode, i);
10956 for (i = 0; i < interleave_factor; i++)
10957 regs[i] = gen_reg_rtx (SImode);
10959 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10960 src = copy_addr_to_reg (XEXP (srcbase, 0));
10962 srcoffset = dstoffset = 0;
10964 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10965 For copying the last bytes we want to subtract this offset again. */
10966 src_autoinc = dst_autoinc = 0;
10968 for (i = 0; i < interleave_factor; i++)
10971 /* Copy BLOCK_SIZE_BYTES chunks. */
10973 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10976 if (src_aligned && interleave_factor > 1)
10978 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10979 TRUE, srcbase, &srcoffset));
10980 src_autoinc += UNITS_PER_WORD * interleave_factor;
10984 for (j = 0; j < interleave_factor; j++)
10986 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10988 mem = adjust_automodify_address (srcbase, SImode, addr,
10989 srcoffset + j * UNITS_PER_WORD);
10990 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10992 srcoffset += block_size_bytes;
10996 if (dst_aligned && interleave_factor > 1)
10998 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10999 TRUE, dstbase, &dstoffset));
11000 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11004 for (j = 0; j < interleave_factor; j++)
11006 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
11008 mem = adjust_automodify_address (dstbase, SImode, addr,
11009 dstoffset + j * UNITS_PER_WORD);
11010 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11012 dstoffset += block_size_bytes;
11015 remaining -= block_size_bytes;
11018 /* Copy any whole words left (note these aren't interleaved with any
11019 subsequent halfword/byte load/stores in the interests of simplicity). */
11021 words = remaining / UNITS_PER_WORD;
11023 gcc_assert (words < interleave_factor);
11025 if (src_aligned && words > 1)
11027 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11029 src_autoinc += UNITS_PER_WORD * words;
11033 for (j = 0; j < words; j++)
11035 addr = plus_constant (src,
11036 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11037 mem = adjust_automodify_address (srcbase, SImode, addr,
11038 srcoffset + j * UNITS_PER_WORD);
11039 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11041 srcoffset += words * UNITS_PER_WORD;
11044 if (dst_aligned && words > 1)
11046 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11048 dst_autoinc += words * UNITS_PER_WORD;
11052 for (j = 0; j < words; j++)
11054 addr = plus_constant (dst,
11055 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11056 mem = adjust_automodify_address (dstbase, SImode, addr,
11057 dstoffset + j * UNITS_PER_WORD);
11058 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11060 dstoffset += words * UNITS_PER_WORD;
11063 remaining -= words * UNITS_PER_WORD;
11065 gcc_assert (remaining < 4);
11067 /* Copy a halfword if necessary. */
11069 if (remaining >= 2)
11071 halfword_tmp = gen_reg_rtx (SImode);
11073 addr = plus_constant (src, srcoffset - src_autoinc);
11074 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11075 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11077 /* Either write out immediately, or delay until we've loaded the last
11078 byte, depending on interleave factor. */
11079 if (interleave_factor == 1)
11081 addr = plus_constant (dst, dstoffset - dst_autoinc);
11082 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11083 emit_insn (gen_unaligned_storehi (mem,
11084 gen_lowpart (HImode, halfword_tmp)));
11085 halfword_tmp = NULL;
11093 gcc_assert (remaining < 2);
11095 /* Copy last byte. */
11097 if ((remaining & 1) != 0)
11099 byte_tmp = gen_reg_rtx (SImode);
11101 addr = plus_constant (src, srcoffset - src_autoinc);
11102 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11103 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11105 if (interleave_factor == 1)
11107 addr = plus_constant (dst, dstoffset - dst_autoinc);
11108 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11109 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11118 /* Store last halfword if we haven't done so already. */
11122 addr = plus_constant (dst, dstoffset - dst_autoinc);
11123 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11124 emit_insn (gen_unaligned_storehi (mem,
11125 gen_lowpart (HImode, halfword_tmp)));
11129 /* Likewise for last byte. */
11133 addr = plus_constant (dst, dstoffset - dst_autoinc);
11134 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11135 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11139 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11142 /* From mips_adjust_block_mem:
11144 Helper function for doing a loop-based block operation on memory
11145 reference MEM. Each iteration of the loop will operate on LENGTH
11148 Create a new base register for use within the loop and point it to
11149 the start of MEM. Create a new memory reference that uses this
11150 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11153 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11156 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11158 /* Although the new mem does not refer to a known location,
11159 it does keep up to LENGTH bytes of alignment. */
11160 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11161 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11164 /* From mips_block_move_loop:
11166 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11167 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11168 the memory regions do not overlap. */
11171 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11172 unsigned int interleave_factor,
11173 HOST_WIDE_INT bytes_per_iter)
11175 rtx label, src_reg, dest_reg, final_src, test;
11176 HOST_WIDE_INT leftover;
11178 leftover = length % bytes_per_iter;
11179 length -= leftover;
11181 /* Create registers and memory references for use within the loop. */
11182 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11183 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11185 /* Calculate the value that SRC_REG should have after the last iteration of
11187 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11188 0, 0, OPTAB_WIDEN);
11190 /* Emit the start of the loop. */
11191 label = gen_label_rtx ();
11192 emit_label (label);
11194 /* Emit the loop body. */
11195 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11196 interleave_factor);
11198 /* Move on to the next block. */
11199 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11200 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11202 /* Emit the loop condition. */
11203 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11204 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11206 /* Mop up any left-over bytes. */
11208 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11211 /* Emit a block move when either the source or destination is unaligned (not
11212 aligned to a four-byte boundary). This may need further tuning depending on
11213 core type, optimize_size setting, etc. */
11216 arm_movmemqi_unaligned (rtx *operands)
11218 HOST_WIDE_INT length = INTVAL (operands[2]);
11222 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11223 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11224 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11225 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11226 or dst_aligned though: allow more interleaving in those cases since the
11227 resulting code can be smaller. */
11228 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11229 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11232 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11233 interleave_factor, bytes_per_iter);
11235 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11236 interleave_factor);
11240 /* Note that the loop created by arm_block_move_unaligned_loop may be
11241 subject to loop unrolling, which makes tuning this condition a little
11244 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11246 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11253 arm_gen_movmemqi (rtx *operands)
11255 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11256 HOST_WIDE_INT srcoffset, dstoffset;
11258 rtx src, dst, srcbase, dstbase;
11259 rtx part_bytes_reg = NULL;
11262 if (GET_CODE (operands[2]) != CONST_INT
11263 || GET_CODE (operands[3]) != CONST_INT
11264 || INTVAL (operands[2]) > 64)
11267 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11268 return arm_movmemqi_unaligned (operands);
11270 if (INTVAL (operands[3]) & 3)
11273 dstbase = operands[0];
11274 srcbase = operands[1];
11276 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11277 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11279 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11280 out_words_to_go = INTVAL (operands[2]) / 4;
11281 last_bytes = INTVAL (operands[2]) & 3;
11282 dstoffset = srcoffset = 0;
11284 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11285 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11287 for (i = 0; in_words_to_go >= 2; i+=4)
11289 if (in_words_to_go > 4)
11290 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11291 TRUE, srcbase, &srcoffset));
11293 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11294 src, FALSE, srcbase,
11297 if (out_words_to_go)
11299 if (out_words_to_go > 4)
11300 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11301 TRUE, dstbase, &dstoffset));
11302 else if (out_words_to_go != 1)
11303 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11304 out_words_to_go, dst,
11307 dstbase, &dstoffset));
11310 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11311 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11312 if (last_bytes != 0)
11314 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11320 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11321 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11324 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11325 if (out_words_to_go)
11329 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11330 sreg = copy_to_reg (mem);
11332 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11333 emit_move_insn (mem, sreg);
11336 gcc_assert (!in_words_to_go); /* Sanity check */
11339 if (in_words_to_go)
11341 gcc_assert (in_words_to_go > 0);
11343 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11344 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11347 gcc_assert (!last_bytes || part_bytes_reg);
11349 if (BYTES_BIG_ENDIAN && last_bytes)
11351 rtx tmp = gen_reg_rtx (SImode);
11353 /* The bytes we want are in the top end of the word. */
11354 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11355 GEN_INT (8 * (4 - last_bytes))));
11356 part_bytes_reg = tmp;
11360 mem = adjust_automodify_address (dstbase, QImode,
11361 plus_constant (dst, last_bytes - 1),
11362 dstoffset + last_bytes - 1);
11363 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11367 tmp = gen_reg_rtx (SImode);
11368 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11369 part_bytes_reg = tmp;
11376 if (last_bytes > 1)
11378 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11379 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11383 rtx tmp = gen_reg_rtx (SImode);
11384 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11385 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11386 part_bytes_reg = tmp;
11393 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11394 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11401 /* Select a dominance comparison mode if possible for a test of the general
11402 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11403 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11404 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11405 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11406 In all cases OP will be either EQ or NE, but we don't need to know which
11407 here. If we are unable to support a dominance comparison we return
11408 CC mode. This will then fail to match for the RTL expressions that
11409 generate this call. */
11411 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11413 enum rtx_code cond1, cond2;
11416 /* Currently we will probably get the wrong result if the individual
11417 comparisons are not simple. This also ensures that it is safe to
11418 reverse a comparison if necessary. */
11419 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11421 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11425 /* The if_then_else variant of this tests the second condition if the
11426 first passes, but is true if the first fails. Reverse the first
11427 condition to get a true "inclusive-or" expression. */
11428 if (cond_or == DOM_CC_NX_OR_Y)
11429 cond1 = reverse_condition (cond1);
11431 /* If the comparisons are not equal, and one doesn't dominate the other,
11432 then we can't do this. */
11434 && !comparison_dominates_p (cond1, cond2)
11435 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11440 enum rtx_code temp = cond1;
11448 if (cond_or == DOM_CC_X_AND_Y)
11453 case EQ: return CC_DEQmode;
11454 case LE: return CC_DLEmode;
11455 case LEU: return CC_DLEUmode;
11456 case GE: return CC_DGEmode;
11457 case GEU: return CC_DGEUmode;
11458 default: gcc_unreachable ();
11462 if (cond_or == DOM_CC_X_AND_Y)
11474 gcc_unreachable ();
11478 if (cond_or == DOM_CC_X_AND_Y)
11490 gcc_unreachable ();
11494 if (cond_or == DOM_CC_X_AND_Y)
11495 return CC_DLTUmode;
11500 return CC_DLTUmode;
11502 return CC_DLEUmode;
11506 gcc_unreachable ();
11510 if (cond_or == DOM_CC_X_AND_Y)
11511 return CC_DGTUmode;
11516 return CC_DGTUmode;
11518 return CC_DGEUmode;
11522 gcc_unreachable ();
11525 /* The remaining cases only occur when both comparisons are the
11528 gcc_assert (cond1 == cond2);
11532 gcc_assert (cond1 == cond2);
11536 gcc_assert (cond1 == cond2);
11540 gcc_assert (cond1 == cond2);
11541 return CC_DLEUmode;
11544 gcc_assert (cond1 == cond2);
11545 return CC_DGEUmode;
11548 gcc_unreachable ();
11553 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11555 /* All floating point compares return CCFP if it is an equality
11556 comparison, and CCFPE otherwise. */
11557 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11577 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11582 gcc_unreachable ();
11586 /* A compare with a shifted operand. Because of canonicalization, the
11587 comparison will have to be swapped when we emit the assembler. */
11588 if (GET_MODE (y) == SImode
11589 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11590 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11591 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11592 || GET_CODE (x) == ROTATERT))
11595 /* This operation is performed swapped, but since we only rely on the Z
11596 flag we don't need an additional mode. */
11597 if (GET_MODE (y) == SImode
11598 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11599 && GET_CODE (x) == NEG
11600 && (op == EQ || op == NE))
11603 /* This is a special case that is used by combine to allow a
11604 comparison of a shifted byte load to be split into a zero-extend
11605 followed by a comparison of the shifted integer (only valid for
11606 equalities and unsigned inequalities). */
11607 if (GET_MODE (x) == SImode
11608 && GET_CODE (x) == ASHIFT
11609 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11610 && GET_CODE (XEXP (x, 0)) == SUBREG
11611 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11612 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11613 && (op == EQ || op == NE
11614 || op == GEU || op == GTU || op == LTU || op == LEU)
11615 && GET_CODE (y) == CONST_INT)
11618 /* A construct for a conditional compare, if the false arm contains
11619 0, then both conditions must be true, otherwise either condition
11620 must be true. Not all conditions are possible, so CCmode is
11621 returned if it can't be done. */
11622 if (GET_CODE (x) == IF_THEN_ELSE
11623 && (XEXP (x, 2) == const0_rtx
11624 || XEXP (x, 2) == const1_rtx)
11625 && COMPARISON_P (XEXP (x, 0))
11626 && COMPARISON_P (XEXP (x, 1)))
11627 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11628 INTVAL (XEXP (x, 2)));
11630 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11631 if (GET_CODE (x) == AND
11632 && (op == EQ || op == NE)
11633 && COMPARISON_P (XEXP (x, 0))
11634 && COMPARISON_P (XEXP (x, 1)))
11635 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11638 if (GET_CODE (x) == IOR
11639 && (op == EQ || op == NE)
11640 && COMPARISON_P (XEXP (x, 0))
11641 && COMPARISON_P (XEXP (x, 1)))
11642 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11645 /* An operation (on Thumb) where we want to test for a single bit.
11646 This is done by shifting that bit up into the top bit of a
11647 scratch register; we can then branch on the sign bit. */
11649 && GET_MODE (x) == SImode
11650 && (op == EQ || op == NE)
11651 && GET_CODE (x) == ZERO_EXTRACT
11652 && XEXP (x, 1) == const1_rtx)
11655 /* An operation that sets the condition codes as a side-effect, the
11656 V flag is not set correctly, so we can only use comparisons where
11657 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11659 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11660 if (GET_MODE (x) == SImode
11662 && (op == EQ || op == NE || op == LT || op == GE)
11663 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11664 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11665 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11666 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11667 || GET_CODE (x) == LSHIFTRT
11668 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11669 || GET_CODE (x) == ROTATERT
11670 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11671 return CC_NOOVmode;
11673 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11676 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11677 && GET_CODE (x) == PLUS
11678 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11681 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11683 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11685 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11692 /* A DImode comparison against zero can be implemented by
11693 or'ing the two halves together. */
11694 if (y == const0_rtx)
11697 /* We can do an equality test in three Thumb instructions. */
11707 /* DImode unsigned comparisons can be implemented by cmp +
11708 cmpeq without a scratch register. Not worth doing in
11719 /* DImode signed and unsigned comparisons can be implemented
11720 by cmp + sbcs with a scratch register, but that does not
11721 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11722 gcc_assert (op != EQ && op != NE);
11726 gcc_unreachable ();
11730 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11731 return GET_MODE (x);
11736 /* X and Y are two things to compare using CODE. Emit the compare insn and
11737 return the rtx for register 0 in the proper mode. FP means this is a
11738 floating point compare: I don't think that it is needed on the arm. */
11740 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11742 enum machine_mode mode;
11744 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11746 /* We might have X as a constant, Y as a register because of the predicates
11747 used for cmpdi. If so, force X to a register here. */
11748 if (dimode_comparison && !REG_P (x))
11749 x = force_reg (DImode, x);
11751 mode = SELECT_CC_MODE (code, x, y);
11752 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11754 if (dimode_comparison
11755 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11756 && mode != CC_CZmode)
11760 /* To compare two non-zero values for equality, XOR them and
11761 then compare against zero. Not used for ARM mode; there
11762 CC_CZmode is cheaper. */
11763 if (mode == CC_Zmode && y != const0_rtx)
11765 gcc_assert (!reload_completed);
11766 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11770 /* A scratch register is required. */
11771 if (reload_completed)
11772 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11774 scratch = gen_rtx_SCRATCH (SImode);
11776 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11777 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11778 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11781 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11786 /* Generate a sequence of insns that will generate the correct return
11787 address mask depending on the physical architecture that the program
11790 arm_gen_return_addr_mask (void)
11792 rtx reg = gen_reg_rtx (Pmode);
11794 emit_insn (gen_return_addr_mask (reg));
11799 arm_reload_in_hi (rtx *operands)
11801 rtx ref = operands[1];
11803 HOST_WIDE_INT offset = 0;
11805 if (GET_CODE (ref) == SUBREG)
11807 offset = SUBREG_BYTE (ref);
11808 ref = SUBREG_REG (ref);
11811 if (GET_CODE (ref) == REG)
11813 /* We have a pseudo which has been spilt onto the stack; there
11814 are two cases here: the first where there is a simple
11815 stack-slot replacement and a second where the stack-slot is
11816 out of range, or is used as a subreg. */
11817 if (reg_equiv_mem (REGNO (ref)))
11819 ref = reg_equiv_mem (REGNO (ref));
11820 base = find_replacement (&XEXP (ref, 0));
11823 /* The slot is out of range, or was dressed up in a SUBREG. */
11824 base = reg_equiv_address (REGNO (ref));
11827 base = find_replacement (&XEXP (ref, 0));
11829 /* Handle the case where the address is too complex to be offset by 1. */
11830 if (GET_CODE (base) == MINUS
11831 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11833 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11835 emit_set_insn (base_plus, base);
11838 else if (GET_CODE (base) == PLUS)
11840 /* The addend must be CONST_INT, or we would have dealt with it above. */
11841 HOST_WIDE_INT hi, lo;
11843 offset += INTVAL (XEXP (base, 1));
11844 base = XEXP (base, 0);
11846 /* Rework the address into a legal sequence of insns. */
11847 /* Valid range for lo is -4095 -> 4095 */
11850 : -((-offset) & 0xfff));
11852 /* Corner case, if lo is the max offset then we would be out of range
11853 once we have added the additional 1 below, so bump the msb into the
11854 pre-loading insn(s). */
11858 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11859 ^ (HOST_WIDE_INT) 0x80000000)
11860 - (HOST_WIDE_INT) 0x80000000);
11862 gcc_assert (hi + lo == offset);
11866 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11868 /* Get the base address; addsi3 knows how to handle constants
11869 that require more than one insn. */
11870 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11876 /* Operands[2] may overlap operands[0] (though it won't overlap
11877 operands[1]), that's why we asked for a DImode reg -- so we can
11878 use the bit that does not overlap. */
11879 if (REGNO (operands[2]) == REGNO (operands[0]))
11880 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11882 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11884 emit_insn (gen_zero_extendqisi2 (scratch,
11885 gen_rtx_MEM (QImode,
11886 plus_constant (base,
11888 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11889 gen_rtx_MEM (QImode,
11890 plus_constant (base,
11892 if (!BYTES_BIG_ENDIAN)
11893 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11894 gen_rtx_IOR (SImode,
11897 gen_rtx_SUBREG (SImode, operands[0], 0),
11901 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11902 gen_rtx_IOR (SImode,
11903 gen_rtx_ASHIFT (SImode, scratch,
11905 gen_rtx_SUBREG (SImode, operands[0], 0)));
11908 /* Handle storing a half-word to memory during reload by synthesizing as two
11909 byte stores. Take care not to clobber the input values until after we
11910 have moved them somewhere safe. This code assumes that if the DImode
11911 scratch in operands[2] overlaps either the input value or output address
11912 in some way, then that value must die in this insn (we absolutely need
11913 two scratch registers for some corner cases). */
11915 arm_reload_out_hi (rtx *operands)
11917 rtx ref = operands[0];
11918 rtx outval = operands[1];
11920 HOST_WIDE_INT offset = 0;
11922 if (GET_CODE (ref) == SUBREG)
11924 offset = SUBREG_BYTE (ref);
11925 ref = SUBREG_REG (ref);
11928 if (GET_CODE (ref) == REG)
11930 /* We have a pseudo which has been spilt onto the stack; there
11931 are two cases here: the first where there is a simple
11932 stack-slot replacement and a second where the stack-slot is
11933 out of range, or is used as a subreg. */
11934 if (reg_equiv_mem (REGNO (ref)))
11936 ref = reg_equiv_mem (REGNO (ref));
11937 base = find_replacement (&XEXP (ref, 0));
11940 /* The slot is out of range, or was dressed up in a SUBREG. */
11941 base = reg_equiv_address (REGNO (ref));
11944 base = find_replacement (&XEXP (ref, 0));
11946 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11948 /* Handle the case where the address is too complex to be offset by 1. */
11949 if (GET_CODE (base) == MINUS
11950 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11952 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11954 /* Be careful not to destroy OUTVAL. */
11955 if (reg_overlap_mentioned_p (base_plus, outval))
11957 /* Updating base_plus might destroy outval, see if we can
11958 swap the scratch and base_plus. */
11959 if (!reg_overlap_mentioned_p (scratch, outval))
11962 scratch = base_plus;
11967 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11969 /* Be conservative and copy OUTVAL into the scratch now,
11970 this should only be necessary if outval is a subreg
11971 of something larger than a word. */
11972 /* XXX Might this clobber base? I can't see how it can,
11973 since scratch is known to overlap with OUTVAL, and
11974 must be wider than a word. */
11975 emit_insn (gen_movhi (scratch_hi, outval));
11976 outval = scratch_hi;
11980 emit_set_insn (base_plus, base);
11983 else if (GET_CODE (base) == PLUS)
11985 /* The addend must be CONST_INT, or we would have dealt with it above. */
11986 HOST_WIDE_INT hi, lo;
11988 offset += INTVAL (XEXP (base, 1));
11989 base = XEXP (base, 0);
11991 /* Rework the address into a legal sequence of insns. */
11992 /* Valid range for lo is -4095 -> 4095 */
11995 : -((-offset) & 0xfff));
11997 /* Corner case, if lo is the max offset then we would be out of range
11998 once we have added the additional 1 below, so bump the msb into the
11999 pre-loading insn(s). */
12003 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12004 ^ (HOST_WIDE_INT) 0x80000000)
12005 - (HOST_WIDE_INT) 0x80000000);
12007 gcc_assert (hi + lo == offset);
12011 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12013 /* Be careful not to destroy OUTVAL. */
12014 if (reg_overlap_mentioned_p (base_plus, outval))
12016 /* Updating base_plus might destroy outval, see if we
12017 can swap the scratch and base_plus. */
12018 if (!reg_overlap_mentioned_p (scratch, outval))
12021 scratch = base_plus;
12026 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12028 /* Be conservative and copy outval into scratch now,
12029 this should only be necessary if outval is a
12030 subreg of something larger than a word. */
12031 /* XXX Might this clobber base? I can't see how it
12032 can, since scratch is known to overlap with
12034 emit_insn (gen_movhi (scratch_hi, outval));
12035 outval = scratch_hi;
12039 /* Get the base address; addsi3 knows how to handle constants
12040 that require more than one insn. */
12041 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12047 if (BYTES_BIG_ENDIAN)
12049 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12050 plus_constant (base, offset + 1)),
12051 gen_lowpart (QImode, outval)));
12052 emit_insn (gen_lshrsi3 (scratch,
12053 gen_rtx_SUBREG (SImode, outval, 0),
12055 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12056 gen_lowpart (QImode, scratch)));
12060 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12061 gen_lowpart (QImode, outval)));
12062 emit_insn (gen_lshrsi3 (scratch,
12063 gen_rtx_SUBREG (SImode, outval, 0),
12065 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12066 plus_constant (base, offset + 1)),
12067 gen_lowpart (QImode, scratch)));
12071 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12072 (padded to the size of a word) should be passed in a register. */
12075 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12077 if (TARGET_AAPCS_BASED)
12078 return must_pass_in_stack_var_size (mode, type);
12080 return must_pass_in_stack_var_size_or_pad (mode, type);
12084 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12085 Return true if an argument passed on the stack should be padded upwards,
12086 i.e. if the least-significant byte has useful data.
12087 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12088 aggregate types are placed in the lowest memory address. */
12091 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12093 if (!TARGET_AAPCS_BASED)
12094 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12096 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12103 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12104 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12105 register has useful data, and return the opposite if the most
12106 significant byte does. */
12109 arm_pad_reg_upward (enum machine_mode mode,
12110 tree type, int first ATTRIBUTE_UNUSED)
12112 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12114 /* For AAPCS, small aggregates, small fixed-point types,
12115 and small complex types are always padded upwards. */
12118 if ((AGGREGATE_TYPE_P (type)
12119 || TREE_CODE (type) == COMPLEX_TYPE
12120 || FIXED_POINT_TYPE_P (type))
12121 && int_size_in_bytes (type) <= 4)
12126 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12127 && GET_MODE_SIZE (mode) <= 4)
12132 /* Otherwise, use default padding. */
12133 return !BYTES_BIG_ENDIAN;
12137 /* Print a symbolic form of X to the debug file, F. */
12139 arm_print_value (FILE *f, rtx x)
12141 switch (GET_CODE (x))
12144 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12148 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12156 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12158 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12159 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12167 fprintf (f, "\"%s\"", XSTR (x, 0));
12171 fprintf (f, "`%s'", XSTR (x, 0));
12175 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12179 arm_print_value (f, XEXP (x, 0));
12183 arm_print_value (f, XEXP (x, 0));
12185 arm_print_value (f, XEXP (x, 1));
12193 fprintf (f, "????");
12198 /* Routines for manipulation of the constant pool. */
12200 /* Arm instructions cannot load a large constant directly into a
12201 register; they have to come from a pc relative load. The constant
12202 must therefore be placed in the addressable range of the pc
12203 relative load. Depending on the precise pc relative load
12204 instruction the range is somewhere between 256 bytes and 4k. This
12205 means that we often have to dump a constant inside a function, and
12206 generate code to branch around it.
12208 It is important to minimize this, since the branches will slow
12209 things down and make the code larger.
12211 Normally we can hide the table after an existing unconditional
12212 branch so that there is no interruption of the flow, but in the
12213 worst case the code looks like this:
12231 We fix this by performing a scan after scheduling, which notices
12232 which instructions need to have their operands fetched from the
12233 constant table and builds the table.
12235 The algorithm starts by building a table of all the constants that
12236 need fixing up and all the natural barriers in the function (places
12237 where a constant table can be dropped without breaking the flow).
12238 For each fixup we note how far the pc-relative replacement will be
12239 able to reach and the offset of the instruction into the function.
12241 Having built the table we then group the fixes together to form
12242 tables that are as large as possible (subject to addressing
12243 constraints) and emit each table of constants after the last
12244 barrier that is within range of all the instructions in the group.
12245 If a group does not contain a barrier, then we forcibly create one
12246 by inserting a jump instruction into the flow. Once the table has
12247 been inserted, the insns are then modified to reference the
12248 relevant entry in the pool.
12250 Possible enhancements to the algorithm (not implemented) are:
12252 1) For some processors and object formats, there may be benefit in
12253 aligning the pools to the start of cache lines; this alignment
12254 would need to be taken into account when calculating addressability
12257 /* These typedefs are located at the start of this file, so that
12258 they can be used in the prototypes there. This comment is to
12259 remind readers of that fact so that the following structures
12260 can be understood more easily.
12262 typedef struct minipool_node Mnode;
12263 typedef struct minipool_fixup Mfix; */
12265 struct minipool_node
12267 /* Doubly linked chain of entries. */
12270 /* The maximum offset into the code that this entry can be placed. While
12271 pushing fixes for forward references, all entries are sorted in order
12272 of increasing max_address. */
12273 HOST_WIDE_INT max_address;
12274 /* Similarly for an entry inserted for a backwards ref. */
12275 HOST_WIDE_INT min_address;
12276 /* The number of fixes referencing this entry. This can become zero
12277 if we "unpush" an entry. In this case we ignore the entry when we
12278 come to emit the code. */
12280 /* The offset from the start of the minipool. */
12281 HOST_WIDE_INT offset;
12282 /* The value in table. */
12284 /* The mode of value. */
12285 enum machine_mode mode;
12286 /* The size of the value. With iWMMXt enabled
12287 sizes > 4 also imply an alignment of 8-bytes. */
12291 struct minipool_fixup
12295 HOST_WIDE_INT address;
12297 enum machine_mode mode;
12301 HOST_WIDE_INT forwards;
12302 HOST_WIDE_INT backwards;
12305 /* Fixes less than a word need padding out to a word boundary. */
12306 #define MINIPOOL_FIX_SIZE(mode) \
12307 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12309 static Mnode * minipool_vector_head;
12310 static Mnode * minipool_vector_tail;
12311 static rtx minipool_vector_label;
12312 static int minipool_pad;
12314 /* The linked list of all minipool fixes required for this function. */
12315 Mfix * minipool_fix_head;
12316 Mfix * minipool_fix_tail;
12317 /* The fix entry for the current minipool, once it has been placed. */
12318 Mfix * minipool_barrier;
12320 /* Determines if INSN is the start of a jump table. Returns the end
12321 of the TABLE or NULL_RTX. */
12323 is_jump_table (rtx insn)
12327 if (jump_to_label_p (insn)
12328 && ((table = next_real_insn (JUMP_LABEL (insn)))
12329 == next_real_insn (insn))
12331 && GET_CODE (table) == JUMP_INSN
12332 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12333 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12339 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12340 #define JUMP_TABLES_IN_TEXT_SECTION 0
12343 static HOST_WIDE_INT
12344 get_jump_table_size (rtx insn)
12346 /* ADDR_VECs only take room if read-only data does into the text
12348 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12350 rtx body = PATTERN (insn);
12351 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12352 HOST_WIDE_INT size;
12353 HOST_WIDE_INT modesize;
12355 modesize = GET_MODE_SIZE (GET_MODE (body));
12356 size = modesize * XVECLEN (body, elt);
12360 /* Round up size of TBB table to a halfword boundary. */
12361 size = (size + 1) & ~(HOST_WIDE_INT)1;
12364 /* No padding necessary for TBH. */
12367 /* Add two bytes for alignment on Thumb. */
12372 gcc_unreachable ();
12380 /* Return the maximum amount of padding that will be inserted before
12383 static HOST_WIDE_INT
12384 get_label_padding (rtx label)
12386 HOST_WIDE_INT align, min_insn_size;
12388 align = 1 << label_to_alignment (label);
12389 min_insn_size = TARGET_THUMB ? 2 : 4;
12390 return align > min_insn_size ? align - min_insn_size : 0;
12393 /* Move a minipool fix MP from its current location to before MAX_MP.
12394 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12395 constraints may need updating. */
12397 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12398 HOST_WIDE_INT max_address)
12400 /* The code below assumes these are different. */
12401 gcc_assert (mp != max_mp);
12403 if (max_mp == NULL)
12405 if (max_address < mp->max_address)
12406 mp->max_address = max_address;
12410 if (max_address > max_mp->max_address - mp->fix_size)
12411 mp->max_address = max_mp->max_address - mp->fix_size;
12413 mp->max_address = max_address;
12415 /* Unlink MP from its current position. Since max_mp is non-null,
12416 mp->prev must be non-null. */
12417 mp->prev->next = mp->next;
12418 if (mp->next != NULL)
12419 mp->next->prev = mp->prev;
12421 minipool_vector_tail = mp->prev;
12423 /* Re-insert it before MAX_MP. */
12425 mp->prev = max_mp->prev;
12428 if (mp->prev != NULL)
12429 mp->prev->next = mp;
12431 minipool_vector_head = mp;
12434 /* Save the new entry. */
12437 /* Scan over the preceding entries and adjust their addresses as
12439 while (mp->prev != NULL
12440 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12442 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12449 /* Add a constant to the minipool for a forward reference. Returns the
12450 node added or NULL if the constant will not fit in this pool. */
12452 add_minipool_forward_ref (Mfix *fix)
12454 /* If set, max_mp is the first pool_entry that has a lower
12455 constraint than the one we are trying to add. */
12456 Mnode * max_mp = NULL;
12457 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12460 /* If the minipool starts before the end of FIX->INSN then this FIX
12461 can not be placed into the current pool. Furthermore, adding the
12462 new constant pool entry may cause the pool to start FIX_SIZE bytes
12464 if (minipool_vector_head &&
12465 (fix->address + get_attr_length (fix->insn)
12466 >= minipool_vector_head->max_address - fix->fix_size))
12469 /* Scan the pool to see if a constant with the same value has
12470 already been added. While we are doing this, also note the
12471 location where we must insert the constant if it doesn't already
12473 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12475 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12476 && fix->mode == mp->mode
12477 && (GET_CODE (fix->value) != CODE_LABEL
12478 || (CODE_LABEL_NUMBER (fix->value)
12479 == CODE_LABEL_NUMBER (mp->value)))
12480 && rtx_equal_p (fix->value, mp->value))
12482 /* More than one fix references this entry. */
12484 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12487 /* Note the insertion point if necessary. */
12489 && mp->max_address > max_address)
12492 /* If we are inserting an 8-bytes aligned quantity and
12493 we have not already found an insertion point, then
12494 make sure that all such 8-byte aligned quantities are
12495 placed at the start of the pool. */
12496 if (ARM_DOUBLEWORD_ALIGN
12498 && fix->fix_size >= 8
12499 && mp->fix_size < 8)
12502 max_address = mp->max_address;
12506 /* The value is not currently in the minipool, so we need to create
12507 a new entry for it. If MAX_MP is NULL, the entry will be put on
12508 the end of the list since the placement is less constrained than
12509 any existing entry. Otherwise, we insert the new fix before
12510 MAX_MP and, if necessary, adjust the constraints on the other
12513 mp->fix_size = fix->fix_size;
12514 mp->mode = fix->mode;
12515 mp->value = fix->value;
12517 /* Not yet required for a backwards ref. */
12518 mp->min_address = -65536;
12520 if (max_mp == NULL)
12522 mp->max_address = max_address;
12524 mp->prev = minipool_vector_tail;
12526 if (mp->prev == NULL)
12528 minipool_vector_head = mp;
12529 minipool_vector_label = gen_label_rtx ();
12532 mp->prev->next = mp;
12534 minipool_vector_tail = mp;
12538 if (max_address > max_mp->max_address - mp->fix_size)
12539 mp->max_address = max_mp->max_address - mp->fix_size;
12541 mp->max_address = max_address;
12544 mp->prev = max_mp->prev;
12546 if (mp->prev != NULL)
12547 mp->prev->next = mp;
12549 minipool_vector_head = mp;
12552 /* Save the new entry. */
12555 /* Scan over the preceding entries and adjust their addresses as
12557 while (mp->prev != NULL
12558 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12560 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12568 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12569 HOST_WIDE_INT min_address)
12571 HOST_WIDE_INT offset;
12573 /* The code below assumes these are different. */
12574 gcc_assert (mp != min_mp);
12576 if (min_mp == NULL)
12578 if (min_address > mp->min_address)
12579 mp->min_address = min_address;
12583 /* We will adjust this below if it is too loose. */
12584 mp->min_address = min_address;
12586 /* Unlink MP from its current position. Since min_mp is non-null,
12587 mp->next must be non-null. */
12588 mp->next->prev = mp->prev;
12589 if (mp->prev != NULL)
12590 mp->prev->next = mp->next;
12592 minipool_vector_head = mp->next;
12594 /* Reinsert it after MIN_MP. */
12596 mp->next = min_mp->next;
12598 if (mp->next != NULL)
12599 mp->next->prev = mp;
12601 minipool_vector_tail = mp;
12607 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12609 mp->offset = offset;
12610 if (mp->refcount > 0)
12611 offset += mp->fix_size;
12613 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12614 mp->next->min_address = mp->min_address + mp->fix_size;
12620 /* Add a constant to the minipool for a backward reference. Returns the
12621 node added or NULL if the constant will not fit in this pool.
12623 Note that the code for insertion for a backwards reference can be
12624 somewhat confusing because the calculated offsets for each fix do
12625 not take into account the size of the pool (which is still under
12628 add_minipool_backward_ref (Mfix *fix)
12630 /* If set, min_mp is the last pool_entry that has a lower constraint
12631 than the one we are trying to add. */
12632 Mnode *min_mp = NULL;
12633 /* This can be negative, since it is only a constraint. */
12634 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12637 /* If we can't reach the current pool from this insn, or if we can't
12638 insert this entry at the end of the pool without pushing other
12639 fixes out of range, then we don't try. This ensures that we
12640 can't fail later on. */
12641 if (min_address >= minipool_barrier->address
12642 || (minipool_vector_tail->min_address + fix->fix_size
12643 >= minipool_barrier->address))
12646 /* Scan the pool to see if a constant with the same value has
12647 already been added. While we are doing this, also note the
12648 location where we must insert the constant if it doesn't already
12650 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12652 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12653 && fix->mode == mp->mode
12654 && (GET_CODE (fix->value) != CODE_LABEL
12655 || (CODE_LABEL_NUMBER (fix->value)
12656 == CODE_LABEL_NUMBER (mp->value)))
12657 && rtx_equal_p (fix->value, mp->value)
12658 /* Check that there is enough slack to move this entry to the
12659 end of the table (this is conservative). */
12660 && (mp->max_address
12661 > (minipool_barrier->address
12662 + minipool_vector_tail->offset
12663 + minipool_vector_tail->fix_size)))
12666 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12669 if (min_mp != NULL)
12670 mp->min_address += fix->fix_size;
12673 /* Note the insertion point if necessary. */
12674 if (mp->min_address < min_address)
12676 /* For now, we do not allow the insertion of 8-byte alignment
12677 requiring nodes anywhere but at the start of the pool. */
12678 if (ARM_DOUBLEWORD_ALIGN
12679 && fix->fix_size >= 8 && mp->fix_size < 8)
12684 else if (mp->max_address
12685 < minipool_barrier->address + mp->offset + fix->fix_size)
12687 /* Inserting before this entry would push the fix beyond
12688 its maximum address (which can happen if we have
12689 re-located a forwards fix); force the new fix to come
12691 if (ARM_DOUBLEWORD_ALIGN
12692 && fix->fix_size >= 8 && mp->fix_size < 8)
12697 min_address = mp->min_address + fix->fix_size;
12700 /* Do not insert a non-8-byte aligned quantity before 8-byte
12701 aligned quantities. */
12702 else if (ARM_DOUBLEWORD_ALIGN
12703 && fix->fix_size < 8
12704 && mp->fix_size >= 8)
12707 min_address = mp->min_address + fix->fix_size;
12712 /* We need to create a new entry. */
12714 mp->fix_size = fix->fix_size;
12715 mp->mode = fix->mode;
12716 mp->value = fix->value;
12718 mp->max_address = minipool_barrier->address + 65536;
12720 mp->min_address = min_address;
12722 if (min_mp == NULL)
12725 mp->next = minipool_vector_head;
12727 if (mp->next == NULL)
12729 minipool_vector_tail = mp;
12730 minipool_vector_label = gen_label_rtx ();
12733 mp->next->prev = mp;
12735 minipool_vector_head = mp;
12739 mp->next = min_mp->next;
12743 if (mp->next != NULL)
12744 mp->next->prev = mp;
12746 minipool_vector_tail = mp;
12749 /* Save the new entry. */
12757 /* Scan over the following entries and adjust their offsets. */
12758 while (mp->next != NULL)
12760 if (mp->next->min_address < mp->min_address + mp->fix_size)
12761 mp->next->min_address = mp->min_address + mp->fix_size;
12764 mp->next->offset = mp->offset + mp->fix_size;
12766 mp->next->offset = mp->offset;
12775 assign_minipool_offsets (Mfix *barrier)
12777 HOST_WIDE_INT offset = 0;
12780 minipool_barrier = barrier;
12782 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12784 mp->offset = offset;
12786 if (mp->refcount > 0)
12787 offset += mp->fix_size;
12791 /* Output the literal table */
12793 dump_minipool (rtx scan)
12799 if (ARM_DOUBLEWORD_ALIGN)
12800 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12801 if (mp->refcount > 0 && mp->fix_size >= 8)
12808 fprintf (dump_file,
12809 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12810 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12812 scan = emit_label_after (gen_label_rtx (), scan);
12813 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12814 scan = emit_label_after (minipool_vector_label, scan);
12816 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12818 if (mp->refcount > 0)
12822 fprintf (dump_file,
12823 ";; Offset %u, min %ld, max %ld ",
12824 (unsigned) mp->offset, (unsigned long) mp->min_address,
12825 (unsigned long) mp->max_address);
12826 arm_print_value (dump_file, mp->value);
12827 fputc ('\n', dump_file);
12830 switch (mp->fix_size)
12832 #ifdef HAVE_consttable_1
12834 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12838 #ifdef HAVE_consttable_2
12840 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12844 #ifdef HAVE_consttable_4
12846 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12850 #ifdef HAVE_consttable_8
12852 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12856 #ifdef HAVE_consttable_16
12858 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12863 gcc_unreachable ();
12871 minipool_vector_head = minipool_vector_tail = NULL;
12872 scan = emit_insn_after (gen_consttable_end (), scan);
12873 scan = emit_barrier_after (scan);
12876 /* Return the cost of forcibly inserting a barrier after INSN. */
12878 arm_barrier_cost (rtx insn)
12880 /* Basing the location of the pool on the loop depth is preferable,
12881 but at the moment, the basic block information seems to be
12882 corrupt by this stage of the compilation. */
12883 int base_cost = 50;
12884 rtx next = next_nonnote_insn (insn);
12886 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12889 switch (GET_CODE (insn))
12892 /* It will always be better to place the table before the label, rather
12901 return base_cost - 10;
12904 return base_cost + 10;
12908 /* Find the best place in the insn stream in the range
12909 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12910 Create the barrier by inserting a jump and add a new fix entry for
12913 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12915 HOST_WIDE_INT count = 0;
12917 rtx from = fix->insn;
12918 /* The instruction after which we will insert the jump. */
12919 rtx selected = NULL;
12921 /* The address at which the jump instruction will be placed. */
12922 HOST_WIDE_INT selected_address;
12924 HOST_WIDE_INT max_count = max_address - fix->address;
12925 rtx label = gen_label_rtx ();
12927 selected_cost = arm_barrier_cost (from);
12928 selected_address = fix->address;
12930 while (from && count < max_count)
12935 /* This code shouldn't have been called if there was a natural barrier
12937 gcc_assert (GET_CODE (from) != BARRIER);
12939 /* Count the length of this insn. This must stay in sync with the
12940 code that pushes minipool fixes. */
12941 if (LABEL_P (from))
12942 count += get_label_padding (from);
12944 count += get_attr_length (from);
12946 /* If there is a jump table, add its length. */
12947 tmp = is_jump_table (from);
12950 count += get_jump_table_size (tmp);
12952 /* Jump tables aren't in a basic block, so base the cost on
12953 the dispatch insn. If we select this location, we will
12954 still put the pool after the table. */
12955 new_cost = arm_barrier_cost (from);
12957 if (count < max_count
12958 && (!selected || new_cost <= selected_cost))
12961 selected_cost = new_cost;
12962 selected_address = fix->address + count;
12965 /* Continue after the dispatch table. */
12966 from = NEXT_INSN (tmp);
12970 new_cost = arm_barrier_cost (from);
12972 if (count < max_count
12973 && (!selected || new_cost <= selected_cost))
12976 selected_cost = new_cost;
12977 selected_address = fix->address + count;
12980 from = NEXT_INSN (from);
12983 /* Make sure that we found a place to insert the jump. */
12984 gcc_assert (selected);
12986 /* Make sure we do not split a call and its corresponding
12987 CALL_ARG_LOCATION note. */
12988 if (CALL_P (selected))
12990 rtx next = NEXT_INSN (selected);
12991 if (next && NOTE_P (next)
12992 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12996 /* Create a new JUMP_INSN that branches around a barrier. */
12997 from = emit_jump_insn_after (gen_jump (label), selected);
12998 JUMP_LABEL (from) = label;
12999 barrier = emit_barrier_after (from);
13000 emit_label_after (label, barrier);
13002 /* Create a minipool barrier entry for the new barrier. */
13003 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13004 new_fix->insn = barrier;
13005 new_fix->address = selected_address;
13006 new_fix->next = fix->next;
13007 fix->next = new_fix;
13012 /* Record that there is a natural barrier in the insn stream at
13015 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13017 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13020 fix->address = address;
13023 if (minipool_fix_head != NULL)
13024 minipool_fix_tail->next = fix;
13026 minipool_fix_head = fix;
13028 minipool_fix_tail = fix;
13031 /* Record INSN, which will need fixing up to load a value from the
13032 minipool. ADDRESS is the offset of the insn since the start of the
13033 function; LOC is a pointer to the part of the insn which requires
13034 fixing; VALUE is the constant that must be loaded, which is of type
13037 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13038 enum machine_mode mode, rtx value)
13040 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13043 fix->address = address;
13046 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13047 fix->value = value;
13048 fix->forwards = get_attr_pool_range (insn);
13049 fix->backwards = get_attr_neg_pool_range (insn);
13050 fix->minipool = NULL;
13052 /* If an insn doesn't have a range defined for it, then it isn't
13053 expecting to be reworked by this code. Better to stop now than
13054 to generate duff assembly code. */
13055 gcc_assert (fix->forwards || fix->backwards);
13057 /* If an entry requires 8-byte alignment then assume all constant pools
13058 require 4 bytes of padding. Trying to do this later on a per-pool
13059 basis is awkward because existing pool entries have to be modified. */
13060 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13065 fprintf (dump_file,
13066 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13067 GET_MODE_NAME (mode),
13068 INSN_UID (insn), (unsigned long) address,
13069 -1 * (long)fix->backwards, (long)fix->forwards);
13070 arm_print_value (dump_file, fix->value);
13071 fprintf (dump_file, "\n");
13074 /* Add it to the chain of fixes. */
13077 if (minipool_fix_head != NULL)
13078 minipool_fix_tail->next = fix;
13080 minipool_fix_head = fix;
13082 minipool_fix_tail = fix;
13085 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13086 Returns the number of insns needed, or 99 if we don't know how to
13089 arm_const_double_inline_cost (rtx val)
13091 rtx lowpart, highpart;
13092 enum machine_mode mode;
13094 mode = GET_MODE (val);
13096 if (mode == VOIDmode)
13099 gcc_assert (GET_MODE_SIZE (mode) == 8);
13101 lowpart = gen_lowpart (SImode, val);
13102 highpart = gen_highpart_mode (SImode, mode, val);
13104 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13105 gcc_assert (GET_CODE (highpart) == CONST_INT);
13107 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13108 NULL_RTX, NULL_RTX, 0, 0)
13109 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13110 NULL_RTX, NULL_RTX, 0, 0));
13113 /* Return true if it is worthwhile to split a 64-bit constant into two
13114 32-bit operations. This is the case if optimizing for size, or
13115 if we have load delay slots, or if one 32-bit part can be done with
13116 a single data operation. */
13118 arm_const_double_by_parts (rtx val)
13120 enum machine_mode mode = GET_MODE (val);
13123 if (optimize_size || arm_ld_sched)
13126 if (mode == VOIDmode)
13129 part = gen_highpart_mode (SImode, mode, val);
13131 gcc_assert (GET_CODE (part) == CONST_INT);
13133 if (const_ok_for_arm (INTVAL (part))
13134 || const_ok_for_arm (~INTVAL (part)))
13137 part = gen_lowpart (SImode, val);
13139 gcc_assert (GET_CODE (part) == CONST_INT);
13141 if (const_ok_for_arm (INTVAL (part))
13142 || const_ok_for_arm (~INTVAL (part)))
13148 /* Return true if it is possible to inline both the high and low parts
13149 of a 64-bit constant into 32-bit data processing instructions. */
13151 arm_const_double_by_immediates (rtx val)
13153 enum machine_mode mode = GET_MODE (val);
13156 if (mode == VOIDmode)
13159 part = gen_highpart_mode (SImode, mode, val);
13161 gcc_assert (GET_CODE (part) == CONST_INT);
13163 if (!const_ok_for_arm (INTVAL (part)))
13166 part = gen_lowpart (SImode, val);
13168 gcc_assert (GET_CODE (part) == CONST_INT);
13170 if (!const_ok_for_arm (INTVAL (part)))
13176 /* Scan INSN and note any of its operands that need fixing.
13177 If DO_PUSHES is false we do not actually push any of the fixups
13178 needed. The function returns TRUE if any fixups were needed/pushed.
13179 This is used by arm_memory_load_p() which needs to know about loads
13180 of constants that will be converted into minipool loads. */
13182 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13184 bool result = false;
13187 extract_insn (insn);
13189 if (!constrain_operands (1))
13190 fatal_insn_not_found (insn);
13192 if (recog_data.n_alternatives == 0)
13195 /* Fill in recog_op_alt with information about the constraints of
13197 preprocess_constraints ();
13199 for (opno = 0; opno < recog_data.n_operands; opno++)
13201 /* Things we need to fix can only occur in inputs. */
13202 if (recog_data.operand_type[opno] != OP_IN)
13205 /* If this alternative is a memory reference, then any mention
13206 of constants in this alternative is really to fool reload
13207 into allowing us to accept one there. We need to fix them up
13208 now so that we output the right code. */
13209 if (recog_op_alt[opno][which_alternative].memory_ok)
13211 rtx op = recog_data.operand[opno];
13213 if (CONSTANT_P (op))
13216 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13217 recog_data.operand_mode[opno], op);
13220 else if (GET_CODE (op) == MEM
13221 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13222 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13226 rtx cop = avoid_constant_pool_reference (op);
13228 /* Casting the address of something to a mode narrower
13229 than a word can cause avoid_constant_pool_reference()
13230 to return the pool reference itself. That's no good to
13231 us here. Lets just hope that we can use the
13232 constant pool value directly. */
13234 cop = get_pool_constant (XEXP (op, 0));
13236 push_minipool_fix (insn, address,
13237 recog_data.operand_loc[opno],
13238 recog_data.operand_mode[opno], cop);
13249 /* Convert instructions to their cc-clobbering variant if possible, since
13250 that allows us to use smaller encodings. */
13253 thumb2_reorg (void)
13258 INIT_REG_SET (&live);
13260 /* We are freeing block_for_insn in the toplev to keep compatibility
13261 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13262 compute_bb_for_insn ();
13269 COPY_REG_SET (&live, DF_LR_OUT (bb));
13270 df_simulate_initialize_backwards (bb, &live);
13271 FOR_BB_INSNS_REVERSE (bb, insn)
13273 if (NONJUMP_INSN_P (insn)
13274 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13276 rtx pat = PATTERN (insn);
13277 if (GET_CODE (pat) == SET
13278 && low_register_operand (XEXP (pat, 0), SImode)
13279 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13280 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13281 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13283 rtx dst = XEXP (pat, 0);
13284 rtx src = XEXP (pat, 1);
13285 rtx op0 = XEXP (src, 0);
13286 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13287 ? XEXP (src, 1) : NULL);
13289 if (rtx_equal_p (dst, op0)
13290 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13292 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13293 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13294 rtvec vec = gen_rtvec (2, pat, clobber);
13296 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13297 INSN_CODE (insn) = -1;
13299 /* We can also handle a commutative operation where the
13300 second operand matches the destination. */
13301 else if (op1 && rtx_equal_p (dst, op1))
13303 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13304 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13307 src = copy_rtx (src);
13308 XEXP (src, 0) = op1;
13309 XEXP (src, 1) = op0;
13310 pat = gen_rtx_SET (VOIDmode, dst, src);
13311 vec = gen_rtvec (2, pat, clobber);
13312 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13313 INSN_CODE (insn) = -1;
13318 if (NONDEBUG_INSN_P (insn))
13319 df_simulate_one_insn_backwards (bb, insn, &live);
13323 CLEAR_REG_SET (&live);
13326 /* Gcc puts the pool in the wrong place for ARM, since we can only
13327 load addresses a limited distance around the pc. We do some
13328 special munging to move the constant pool values to the correct
13329 point in the code. */
13334 HOST_WIDE_INT address = 0;
13340 minipool_fix_head = minipool_fix_tail = NULL;
13342 /* The first insn must always be a note, or the code below won't
13343 scan it properly. */
13344 insn = get_insns ();
13345 gcc_assert (GET_CODE (insn) == NOTE);
13348 /* Scan all the insns and record the operands that will need fixing. */
13349 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13351 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13352 && (arm_cirrus_insn_p (insn)
13353 || GET_CODE (insn) == JUMP_INSN
13354 || arm_memory_load_p (insn)))
13355 cirrus_reorg (insn);
13357 if (GET_CODE (insn) == BARRIER)
13358 push_minipool_barrier (insn, address);
13359 else if (INSN_P (insn))
13363 note_invalid_constants (insn, address, true);
13364 address += get_attr_length (insn);
13366 /* If the insn is a vector jump, add the size of the table
13367 and skip the table. */
13368 if ((table = is_jump_table (insn)) != NULL)
13370 address += get_jump_table_size (table);
13374 else if (LABEL_P (insn))
13375 /* Add the worst-case padding due to alignment. We don't add
13376 the _current_ padding because the minipool insertions
13377 themselves might change it. */
13378 address += get_label_padding (insn);
13381 fix = minipool_fix_head;
13383 /* Now scan the fixups and perform the required changes. */
13388 Mfix * last_added_fix;
13389 Mfix * last_barrier = NULL;
13392 /* Skip any further barriers before the next fix. */
13393 while (fix && GET_CODE (fix->insn) == BARRIER)
13396 /* No more fixes. */
13400 last_added_fix = NULL;
13402 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13404 if (GET_CODE (ftmp->insn) == BARRIER)
13406 if (ftmp->address >= minipool_vector_head->max_address)
13409 last_barrier = ftmp;
13411 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13414 last_added_fix = ftmp; /* Keep track of the last fix added. */
13417 /* If we found a barrier, drop back to that; any fixes that we
13418 could have reached but come after the barrier will now go in
13419 the next mini-pool. */
13420 if (last_barrier != NULL)
13422 /* Reduce the refcount for those fixes that won't go into this
13424 for (fdel = last_barrier->next;
13425 fdel && fdel != ftmp;
13428 fdel->minipool->refcount--;
13429 fdel->minipool = NULL;
13432 ftmp = last_barrier;
13436 /* ftmp is first fix that we can't fit into this pool and
13437 there no natural barriers that we could use. Insert a
13438 new barrier in the code somewhere between the previous
13439 fix and this one, and arrange to jump around it. */
13440 HOST_WIDE_INT max_address;
13442 /* The last item on the list of fixes must be a barrier, so
13443 we can never run off the end of the list of fixes without
13444 last_barrier being set. */
13447 max_address = minipool_vector_head->max_address;
13448 /* Check that there isn't another fix that is in range that
13449 we couldn't fit into this pool because the pool was
13450 already too large: we need to put the pool before such an
13451 instruction. The pool itself may come just after the
13452 fix because create_fix_barrier also allows space for a
13453 jump instruction. */
13454 if (ftmp->address < max_address)
13455 max_address = ftmp->address + 1;
13457 last_barrier = create_fix_barrier (last_added_fix, max_address);
13460 assign_minipool_offsets (last_barrier);
13464 if (GET_CODE (ftmp->insn) != BARRIER
13465 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13472 /* Scan over the fixes we have identified for this pool, fixing them
13473 up and adding the constants to the pool itself. */
13474 for (this_fix = fix; this_fix && ftmp != this_fix;
13475 this_fix = this_fix->next)
13476 if (GET_CODE (this_fix->insn) != BARRIER)
13479 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13480 minipool_vector_label),
13481 this_fix->minipool->offset);
13482 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13485 dump_minipool (last_barrier->insn);
13489 /* From now on we must synthesize any constants that we can't handle
13490 directly. This can happen if the RTL gets split during final
13491 instruction generation. */
13492 after_arm_reorg = 1;
13494 /* Free the minipool memory. */
13495 obstack_free (&minipool_obstack, minipool_startobj);
13498 /* Routines to output assembly language. */
13500 /* If the rtx is the correct value then return the string of the number.
13501 In this way we can ensure that valid double constants are generated even
13502 when cross compiling. */
13504 fp_immediate_constant (rtx x)
13509 if (!fp_consts_inited)
13512 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13513 for (i = 0; i < 8; i++)
13514 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13515 return strings_fp[i];
13517 gcc_unreachable ();
13520 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13521 static const char *
13522 fp_const_from_val (REAL_VALUE_TYPE *r)
13526 if (!fp_consts_inited)
13529 for (i = 0; i < 8; i++)
13530 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13531 return strings_fp[i];
13533 gcc_unreachable ();
13536 /* Output the operands of a LDM/STM instruction to STREAM.
13537 MASK is the ARM register set mask of which only bits 0-15 are important.
13538 REG is the base register, either the frame pointer or the stack pointer,
13539 INSTR is the possibly suffixed load or store instruction.
13540 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13543 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13544 unsigned long mask, int rfe)
13547 bool not_first = FALSE;
13549 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13550 fputc ('\t', stream);
13551 asm_fprintf (stream, instr, reg);
13552 fputc ('{', stream);
13554 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13555 if (mask & (1 << i))
13558 fprintf (stream, ", ");
13560 asm_fprintf (stream, "%r", i);
13565 fprintf (stream, "}^\n");
13567 fprintf (stream, "}\n");
13571 /* Output a FLDMD instruction to STREAM.
13572 BASE if the register containing the address.
13573 REG and COUNT specify the register range.
13574 Extra registers may be added to avoid hardware bugs.
13576 We output FLDMD even for ARMv5 VFP implementations. Although
13577 FLDMD is technically not supported until ARMv6, it is believed
13578 that all VFP implementations support its use in this context. */
13581 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13585 /* Workaround ARM10 VFPr1 bug. */
13586 if (count == 2 && !arm_arch6)
13593 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13594 load into multiple parts if we have to handle more than 16 registers. */
13597 vfp_output_fldmd (stream, base, reg, 16);
13598 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13602 fputc ('\t', stream);
13603 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13605 for (i = reg; i < reg + count; i++)
13608 fputs (", ", stream);
13609 asm_fprintf (stream, "d%d", i);
13611 fputs ("}\n", stream);
13616 /* Output the assembly for a store multiple. */
13619 vfp_output_fstmd (rtx * operands)
13626 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13627 p = strlen (pattern);
13629 gcc_assert (GET_CODE (operands[1]) == REG);
13631 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13632 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13634 p += sprintf (&pattern[p], ", d%d", base + i);
13636 strcpy (&pattern[p], "}");
13638 output_asm_insn (pattern, operands);
13643 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13644 number of bytes pushed. */
13647 vfp_emit_fstmd (int base_reg, int count)
13654 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13655 register pairs are stored by a store multiple insn. We avoid this
13656 by pushing an extra pair. */
13657 if (count == 2 && !arm_arch6)
13659 if (base_reg == LAST_VFP_REGNUM - 3)
13664 /* FSTMD may not store more than 16 doubleword registers at once. Split
13665 larger stores into multiple parts (up to a maximum of two, in
13670 /* NOTE: base_reg is an internal register number, so each D register
13672 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13673 saved += vfp_emit_fstmd (base_reg, 16);
13677 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13678 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13680 reg = gen_rtx_REG (DFmode, base_reg);
13683 XVECEXP (par, 0, 0)
13684 = gen_rtx_SET (VOIDmode,
13687 gen_rtx_PRE_MODIFY (Pmode,
13690 (stack_pointer_rtx,
13693 gen_rtx_UNSPEC (BLKmode,
13694 gen_rtvec (1, reg),
13695 UNSPEC_PUSH_MULT));
13697 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13698 plus_constant (stack_pointer_rtx, -(count * 8)));
13699 RTX_FRAME_RELATED_P (tmp) = 1;
13700 XVECEXP (dwarf, 0, 0) = tmp;
13702 tmp = gen_rtx_SET (VOIDmode,
13703 gen_frame_mem (DFmode, stack_pointer_rtx),
13705 RTX_FRAME_RELATED_P (tmp) = 1;
13706 XVECEXP (dwarf, 0, 1) = tmp;
13708 for (i = 1; i < count; i++)
13710 reg = gen_rtx_REG (DFmode, base_reg);
13712 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13714 tmp = gen_rtx_SET (VOIDmode,
13715 gen_frame_mem (DFmode,
13716 plus_constant (stack_pointer_rtx,
13719 RTX_FRAME_RELATED_P (tmp) = 1;
13720 XVECEXP (dwarf, 0, i + 1) = tmp;
13723 par = emit_insn (par);
13724 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13725 RTX_FRAME_RELATED_P (par) = 1;
13730 /* Emit a call instruction with pattern PAT. ADDR is the address of
13731 the call target. */
13734 arm_emit_call_insn (rtx pat, rtx addr)
13738 insn = emit_call_insn (pat);
13740 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13741 If the call might use such an entry, add a use of the PIC register
13742 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13743 if (TARGET_VXWORKS_RTP
13745 && GET_CODE (addr) == SYMBOL_REF
13746 && (SYMBOL_REF_DECL (addr)
13747 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13748 : !SYMBOL_REF_LOCAL_P (addr)))
13750 require_pic_register ();
13751 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13755 /* Output a 'call' insn. */
13757 output_call (rtx *operands)
13759 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13761 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13762 if (REGNO (operands[0]) == LR_REGNUM)
13764 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13765 output_asm_insn ("mov%?\t%0, %|lr", operands);
13768 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13770 if (TARGET_INTERWORK || arm_arch4t)
13771 output_asm_insn ("bx%?\t%0", operands);
13773 output_asm_insn ("mov%?\t%|pc, %0", operands);
13778 /* Output a 'call' insn that is a reference in memory. This is
13779 disabled for ARMv5 and we prefer a blx instead because otherwise
13780 there's a significant performance overhead. */
13782 output_call_mem (rtx *operands)
13784 gcc_assert (!arm_arch5);
13785 if (TARGET_INTERWORK)
13787 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13788 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13789 output_asm_insn ("bx%?\t%|ip", operands);
13791 else if (regno_use_in (LR_REGNUM, operands[0]))
13793 /* LR is used in the memory address. We load the address in the
13794 first instruction. It's safe to use IP as the target of the
13795 load since the call will kill it anyway. */
13796 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13797 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13799 output_asm_insn ("bx%?\t%|ip", operands);
13801 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13805 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13806 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13813 /* Output a move from arm registers to an fpa registers.
13814 OPERANDS[0] is an fpa register.
13815 OPERANDS[1] is the first registers of an arm register pair. */
13817 output_mov_long_double_fpa_from_arm (rtx *operands)
13819 int arm_reg0 = REGNO (operands[1]);
13822 gcc_assert (arm_reg0 != IP_REGNUM);
13824 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13825 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13826 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13828 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13829 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13834 /* Output a move from an fpa register to arm registers.
13835 OPERANDS[0] is the first registers of an arm register pair.
13836 OPERANDS[1] is an fpa register. */
13838 output_mov_long_double_arm_from_fpa (rtx *operands)
13840 int arm_reg0 = REGNO (operands[0]);
13843 gcc_assert (arm_reg0 != IP_REGNUM);
13845 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13846 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13847 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13849 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13850 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13854 /* Output a move from arm registers to arm registers of a long double
13855 OPERANDS[0] is the destination.
13856 OPERANDS[1] is the source. */
13858 output_mov_long_double_arm_from_arm (rtx *operands)
13860 /* We have to be careful here because the two might overlap. */
13861 int dest_start = REGNO (operands[0]);
13862 int src_start = REGNO (operands[1]);
13866 if (dest_start < src_start)
13868 for (i = 0; i < 3; i++)
13870 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13871 ops[1] = gen_rtx_REG (SImode, src_start + i);
13872 output_asm_insn ("mov%?\t%0, %1", ops);
13877 for (i = 2; i >= 0; i--)
13879 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13880 ops[1] = gen_rtx_REG (SImode, src_start + i);
13881 output_asm_insn ("mov%?\t%0, %1", ops);
13889 arm_emit_movpair (rtx dest, rtx src)
13891 /* If the src is an immediate, simplify it. */
13892 if (CONST_INT_P (src))
13894 HOST_WIDE_INT val = INTVAL (src);
13895 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13896 if ((val >> 16) & 0x0000ffff)
13897 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13899 GEN_INT ((val >> 16) & 0x0000ffff));
13902 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13903 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13906 /* Output a move from arm registers to an fpa registers.
13907 OPERANDS[0] is an fpa register.
13908 OPERANDS[1] is the first registers of an arm register pair. */
13910 output_mov_double_fpa_from_arm (rtx *operands)
13912 int arm_reg0 = REGNO (operands[1]);
13915 gcc_assert (arm_reg0 != IP_REGNUM);
13917 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13918 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13919 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13920 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13924 /* Output a move from an fpa register to arm registers.
13925 OPERANDS[0] is the first registers of an arm register pair.
13926 OPERANDS[1] is an fpa register. */
13928 output_mov_double_arm_from_fpa (rtx *operands)
13930 int arm_reg0 = REGNO (operands[0]);
13933 gcc_assert (arm_reg0 != IP_REGNUM);
13935 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13936 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13937 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13938 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13942 /* Output a move between double words. It must be REG<-MEM
13945 output_move_double (rtx *operands, bool emit, int *count)
13947 enum rtx_code code0 = GET_CODE (operands[0]);
13948 enum rtx_code code1 = GET_CODE (operands[1]);
13953 /* The only case when this might happen is when
13954 you are looking at the length of a DImode instruction
13955 that has an invalid constant in it. */
13956 if (code0 == REG && code1 != MEM)
13958 gcc_assert (!emit);
13965 unsigned int reg0 = REGNO (operands[0]);
13967 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13969 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13971 switch (GET_CODE (XEXP (operands[1], 0)))
13978 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13979 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13981 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13986 gcc_assert (TARGET_LDRD);
13988 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13995 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13997 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14005 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14007 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14012 gcc_assert (TARGET_LDRD);
14014 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14019 /* Autoicrement addressing modes should never have overlapping
14020 base and destination registers, and overlapping index registers
14021 are already prohibited, so this doesn't need to worry about
14023 otherops[0] = operands[0];
14024 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14025 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14027 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14029 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14031 /* Registers overlap so split out the increment. */
14034 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14035 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14042 /* Use a single insn if we can.
14043 FIXME: IWMMXT allows offsets larger than ldrd can
14044 handle, fix these up with a pair of ldr. */
14046 || GET_CODE (otherops[2]) != CONST_INT
14047 || (INTVAL (otherops[2]) > -256
14048 && INTVAL (otherops[2]) < 256))
14051 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14057 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14058 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14068 /* Use a single insn if we can.
14069 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14070 fix these up with a pair of ldr. */
14072 || GET_CODE (otherops[2]) != CONST_INT
14073 || (INTVAL (otherops[2]) > -256
14074 && INTVAL (otherops[2]) < 256))
14077 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14083 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14084 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14094 /* We might be able to use ldrd %0, %1 here. However the range is
14095 different to ldr/adr, and it is broken on some ARMv7-M
14096 implementations. */
14097 /* Use the second register of the pair to avoid problematic
14099 otherops[1] = operands[1];
14101 output_asm_insn ("adr%?\t%0, %1", otherops);
14102 operands[1] = otherops[0];
14106 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14108 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14115 /* ??? This needs checking for thumb2. */
14117 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14118 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14120 otherops[0] = operands[0];
14121 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14122 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14124 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14126 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14128 switch ((int) INTVAL (otherops[2]))
14132 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14138 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14144 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14148 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14149 operands[1] = otherops[0];
14151 && (GET_CODE (otherops[2]) == REG
14153 || (GET_CODE (otherops[2]) == CONST_INT
14154 && INTVAL (otherops[2]) > -256
14155 && INTVAL (otherops[2]) < 256)))
14157 if (reg_overlap_mentioned_p (operands[0],
14161 /* Swap base and index registers over to
14162 avoid a conflict. */
14164 otherops[1] = otherops[2];
14167 /* If both registers conflict, it will usually
14168 have been fixed by a splitter. */
14169 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14170 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14174 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14175 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14182 otherops[0] = operands[0];
14184 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14189 if (GET_CODE (otherops[2]) == CONST_INT)
14193 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14194 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14196 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14202 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14208 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14215 return "ldr%(d%)\t%0, [%1]";
14217 return "ldm%(ia%)\t%1, %M0";
14221 otherops[1] = adjust_address (operands[1], SImode, 4);
14222 /* Take care of overlapping base/data reg. */
14223 if (reg_mentioned_p (operands[0], operands[1]))
14227 output_asm_insn ("ldr%?\t%0, %1", otherops);
14228 output_asm_insn ("ldr%?\t%0, %1", operands);
14238 output_asm_insn ("ldr%?\t%0, %1", operands);
14239 output_asm_insn ("ldr%?\t%0, %1", otherops);
14249 /* Constraints should ensure this. */
14250 gcc_assert (code0 == MEM && code1 == REG);
14251 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14253 switch (GET_CODE (XEXP (operands[0], 0)))
14259 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14261 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14266 gcc_assert (TARGET_LDRD);
14268 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14275 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14277 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14285 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14287 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14292 gcc_assert (TARGET_LDRD);
14294 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14299 otherops[0] = operands[1];
14300 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14301 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14303 /* IWMMXT allows offsets larger than ldrd can handle,
14304 fix these up with a pair of ldr. */
14306 && GET_CODE (otherops[2]) == CONST_INT
14307 && (INTVAL(otherops[2]) <= -256
14308 || INTVAL(otherops[2]) >= 256))
14310 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14314 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14315 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14324 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14325 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14331 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14334 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14339 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14344 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14345 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14347 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14351 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14358 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14365 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14370 && (GET_CODE (otherops[2]) == REG
14372 || (GET_CODE (otherops[2]) == CONST_INT
14373 && INTVAL (otherops[2]) > -256
14374 && INTVAL (otherops[2]) < 256)))
14376 otherops[0] = operands[1];
14377 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14379 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14385 otherops[0] = adjust_address (operands[0], SImode, 4);
14386 otherops[1] = operands[1];
14389 output_asm_insn ("str%?\t%1, %0", operands);
14390 output_asm_insn ("str%?\t%H1, %0", otherops);
14400 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14401 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14404 output_move_quad (rtx *operands)
14406 if (REG_P (operands[0]))
14408 /* Load, or reg->reg move. */
14410 if (MEM_P (operands[1]))
14412 switch (GET_CODE (XEXP (operands[1], 0)))
14415 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14420 output_asm_insn ("adr%?\t%0, %1", operands);
14421 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14425 gcc_unreachable ();
14433 gcc_assert (REG_P (operands[1]));
14435 dest = REGNO (operands[0]);
14436 src = REGNO (operands[1]);
14438 /* This seems pretty dumb, but hopefully GCC won't try to do it
14441 for (i = 0; i < 4; i++)
14443 ops[0] = gen_rtx_REG (SImode, dest + i);
14444 ops[1] = gen_rtx_REG (SImode, src + i);
14445 output_asm_insn ("mov%?\t%0, %1", ops);
14448 for (i = 3; i >= 0; i--)
14450 ops[0] = gen_rtx_REG (SImode, dest + i);
14451 ops[1] = gen_rtx_REG (SImode, src + i);
14452 output_asm_insn ("mov%?\t%0, %1", ops);
14458 gcc_assert (MEM_P (operands[0]));
14459 gcc_assert (REG_P (operands[1]));
14460 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14462 switch (GET_CODE (XEXP (operands[0], 0)))
14465 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14469 gcc_unreachable ();
14476 /* Output a VFP load or store instruction. */
14479 output_move_vfp (rtx *operands)
14481 rtx reg, mem, addr, ops[2];
14482 int load = REG_P (operands[0]);
14483 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14484 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14487 enum machine_mode mode;
14489 reg = operands[!load];
14490 mem = operands[load];
14492 mode = GET_MODE (reg);
14494 gcc_assert (REG_P (reg));
14495 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14496 gcc_assert (mode == SFmode
14500 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14501 gcc_assert (MEM_P (mem));
14503 addr = XEXP (mem, 0);
14505 switch (GET_CODE (addr))
14508 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14509 ops[0] = XEXP (addr, 0);
14514 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14515 ops[0] = XEXP (addr, 0);
14520 templ = "f%s%c%%?\t%%%s0, %%1%s";
14526 sprintf (buff, templ,
14527 load ? "ld" : "st",
14530 integer_p ? "\t%@ int" : "");
14531 output_asm_insn (buff, ops);
14536 /* Output a Neon quad-word load or store, or a load or store for
14537 larger structure modes.
14539 WARNING: The ordering of elements is weird in big-endian mode,
14540 because we use VSTM, as required by the EABI. GCC RTL defines
14541 element ordering based on in-memory order. This can be differ
14542 from the architectural ordering of elements within a NEON register.
14543 The intrinsics defined in arm_neon.h use the NEON register element
14544 ordering, not the GCC RTL element ordering.
14546 For example, the in-memory ordering of a big-endian a quadword
14547 vector with 16-bit elements when stored from register pair {d0,d1}
14548 will be (lowest address first, d0[N] is NEON register element N):
14550 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14552 When necessary, quadword registers (dN, dN+1) are moved to ARM
14553 registers from rN in the order:
14555 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14557 So that STM/LDM can be used on vectors in ARM registers, and the
14558 same memory layout will result as if VSTM/VLDM were used. */
14561 output_move_neon (rtx *operands)
14563 rtx reg, mem, addr, ops[2];
14564 int regno, load = REG_P (operands[0]);
14567 enum machine_mode mode;
14569 reg = operands[!load];
14570 mem = operands[load];
14572 mode = GET_MODE (reg);
14574 gcc_assert (REG_P (reg));
14575 regno = REGNO (reg);
14576 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14577 || NEON_REGNO_OK_FOR_QUAD (regno));
14578 gcc_assert (VALID_NEON_DREG_MODE (mode)
14579 || VALID_NEON_QREG_MODE (mode)
14580 || VALID_NEON_STRUCT_MODE (mode));
14581 gcc_assert (MEM_P (mem));
14583 addr = XEXP (mem, 0);
14585 /* Strip off const from addresses like (const (plus (...))). */
14586 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14587 addr = XEXP (addr, 0);
14589 switch (GET_CODE (addr))
14592 templ = "v%smia%%?\t%%0!, %%h1";
14593 ops[0] = XEXP (addr, 0);
14598 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14599 templ = "v%smdb%%?\t%%0!, %%h1";
14600 ops[0] = XEXP (addr, 0);
14605 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14606 gcc_unreachable ();
14611 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14614 for (i = 0; i < nregs; i++)
14616 /* We're only using DImode here because it's a convenient size. */
14617 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14618 ops[1] = adjust_address (mem, DImode, 8 * i);
14619 if (reg_overlap_mentioned_p (ops[0], mem))
14621 gcc_assert (overlap == -1);
14626 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14627 output_asm_insn (buff, ops);
14632 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14633 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14634 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14635 output_asm_insn (buff, ops);
14642 templ = "v%smia%%?\t%%m0, %%h1";
14647 sprintf (buff, templ, load ? "ld" : "st");
14648 output_asm_insn (buff, ops);
14653 /* Compute and return the length of neon_mov<mode>, where <mode> is
14654 one of VSTRUCT modes: EI, OI, CI or XI. */
14656 arm_attr_length_move_neon (rtx insn)
14658 rtx reg, mem, addr;
14660 enum machine_mode mode;
14662 extract_insn_cached (insn);
14664 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14666 mode = GET_MODE (recog_data.operand[0]);
14677 gcc_unreachable ();
14681 load = REG_P (recog_data.operand[0]);
14682 reg = recog_data.operand[!load];
14683 mem = recog_data.operand[load];
14685 gcc_assert (MEM_P (mem));
14687 mode = GET_MODE (reg);
14688 addr = XEXP (mem, 0);
14690 /* Strip off const from addresses like (const (plus (...))). */
14691 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14692 addr = XEXP (addr, 0);
14694 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14696 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14703 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14707 arm_address_offset_is_imm (rtx insn)
14711 extract_insn_cached (insn);
14713 if (REG_P (recog_data.operand[0]))
14716 mem = recog_data.operand[0];
14718 gcc_assert (MEM_P (mem));
14720 addr = XEXP (mem, 0);
14722 if (GET_CODE (addr) == REG
14723 || (GET_CODE (addr) == PLUS
14724 && GET_CODE (XEXP (addr, 0)) == REG
14725 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14731 /* Output an ADD r, s, #n where n may be too big for one instruction.
14732 If adding zero to one register, output nothing. */
14734 output_add_immediate (rtx *operands)
14736 HOST_WIDE_INT n = INTVAL (operands[2]);
14738 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14741 output_multi_immediate (operands,
14742 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14745 output_multi_immediate (operands,
14746 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14753 /* Output a multiple immediate operation.
14754 OPERANDS is the vector of operands referred to in the output patterns.
14755 INSTR1 is the output pattern to use for the first constant.
14756 INSTR2 is the output pattern to use for subsequent constants.
14757 IMMED_OP is the index of the constant slot in OPERANDS.
14758 N is the constant value. */
14759 static const char *
14760 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14761 int immed_op, HOST_WIDE_INT n)
14763 #if HOST_BITS_PER_WIDE_INT > 32
14769 /* Quick and easy output. */
14770 operands[immed_op] = const0_rtx;
14771 output_asm_insn (instr1, operands);
14776 const char * instr = instr1;
14778 /* Note that n is never zero here (which would give no output). */
14779 for (i = 0; i < 32; i += 2)
14783 operands[immed_op] = GEN_INT (n & (255 << i));
14784 output_asm_insn (instr, operands);
14794 /* Return the name of a shifter operation. */
14795 static const char *
14796 arm_shift_nmem(enum rtx_code code)
14801 return ARM_LSL_NAME;
14817 /* Return the appropriate ARM instruction for the operation code.
14818 The returned result should not be overwritten. OP is the rtx of the
14819 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14822 arithmetic_instr (rtx op, int shift_first_arg)
14824 switch (GET_CODE (op))
14830 return shift_first_arg ? "rsb" : "sub";
14845 return arm_shift_nmem(GET_CODE(op));
14848 gcc_unreachable ();
14852 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14853 for the operation code. The returned result should not be overwritten.
14854 OP is the rtx code of the shift.
14855 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14857 static const char *
14858 shift_op (rtx op, HOST_WIDE_INT *amountp)
14861 enum rtx_code code = GET_CODE (op);
14863 switch (GET_CODE (XEXP (op, 1)))
14871 *amountp = INTVAL (XEXP (op, 1));
14875 gcc_unreachable ();
14881 gcc_assert (*amountp != -1);
14882 *amountp = 32 - *amountp;
14885 /* Fall through. */
14891 mnem = arm_shift_nmem(code);
14895 /* We never have to worry about the amount being other than a
14896 power of 2, since this case can never be reloaded from a reg. */
14897 gcc_assert (*amountp != -1);
14898 *amountp = int_log2 (*amountp);
14899 return ARM_LSL_NAME;
14902 gcc_unreachable ();
14905 if (*amountp != -1)
14907 /* This is not 100% correct, but follows from the desire to merge
14908 multiplication by a power of 2 with the recognizer for a
14909 shift. >=32 is not a valid shift for "lsl", so we must try and
14910 output a shift that produces the correct arithmetical result.
14911 Using lsr #32 is identical except for the fact that the carry bit
14912 is not set correctly if we set the flags; but we never use the
14913 carry bit from such an operation, so we can ignore that. */
14914 if (code == ROTATERT)
14915 /* Rotate is just modulo 32. */
14917 else if (*amountp != (*amountp & 31))
14919 if (code == ASHIFT)
14924 /* Shifts of 0 are no-ops. */
14932 /* Obtain the shift from the POWER of two. */
14934 static HOST_WIDE_INT
14935 int_log2 (HOST_WIDE_INT power)
14937 HOST_WIDE_INT shift = 0;
14939 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14941 gcc_assert (shift <= 31);
14948 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14949 because /bin/as is horribly restrictive. The judgement about
14950 whether or not each character is 'printable' (and can be output as
14951 is) or not (and must be printed with an octal escape) must be made
14952 with reference to the *host* character set -- the situation is
14953 similar to that discussed in the comments above pp_c_char in
14954 c-pretty-print.c. */
14956 #define MAX_ASCII_LEN 51
14959 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14962 int len_so_far = 0;
14964 fputs ("\t.ascii\t\"", stream);
14966 for (i = 0; i < len; i++)
14970 if (len_so_far >= MAX_ASCII_LEN)
14972 fputs ("\"\n\t.ascii\t\"", stream);
14978 if (c == '\\' || c == '\"')
14980 putc ('\\', stream);
14988 fprintf (stream, "\\%03o", c);
14993 fputs ("\"\n", stream);
14996 /* Compute the register save mask for registers 0 through 12
14997 inclusive. This code is used by arm_compute_save_reg_mask. */
14999 static unsigned long
15000 arm_compute_save_reg0_reg12_mask (void)
15002 unsigned long func_type = arm_current_func_type ();
15003 unsigned long save_reg_mask = 0;
15006 if (IS_INTERRUPT (func_type))
15008 unsigned int max_reg;
15009 /* Interrupt functions must not corrupt any registers,
15010 even call clobbered ones. If this is a leaf function
15011 we can just examine the registers used by the RTL, but
15012 otherwise we have to assume that whatever function is
15013 called might clobber anything, and so we have to save
15014 all the call-clobbered registers as well. */
15015 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15016 /* FIQ handlers have registers r8 - r12 banked, so
15017 we only need to check r0 - r7, Normal ISRs only
15018 bank r14 and r15, so we must check up to r12.
15019 r13 is the stack pointer which is always preserved,
15020 so we do not need to consider it here. */
15025 for (reg = 0; reg <= max_reg; reg++)
15026 if (df_regs_ever_live_p (reg)
15027 || (! current_function_is_leaf && call_used_regs[reg]))
15028 save_reg_mask |= (1 << reg);
15030 /* Also save the pic base register if necessary. */
15032 && !TARGET_SINGLE_PIC_BASE
15033 && arm_pic_register != INVALID_REGNUM
15034 && crtl->uses_pic_offset_table)
15035 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15037 else if (IS_VOLATILE(func_type))
15039 /* For noreturn functions we historically omitted register saves
15040 altogether. However this really messes up debugging. As a
15041 compromise save just the frame pointers. Combined with the link
15042 register saved elsewhere this should be sufficient to get
15044 if (frame_pointer_needed)
15045 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15046 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15047 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15048 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15049 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15053 /* In the normal case we only need to save those registers
15054 which are call saved and which are used by this function. */
15055 for (reg = 0; reg <= 11; reg++)
15056 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15057 save_reg_mask |= (1 << reg);
15059 /* Handle the frame pointer as a special case. */
15060 if (frame_pointer_needed)
15061 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15063 /* If we aren't loading the PIC register,
15064 don't stack it even though it may be live. */
15066 && !TARGET_SINGLE_PIC_BASE
15067 && arm_pic_register != INVALID_REGNUM
15068 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15069 || crtl->uses_pic_offset_table))
15070 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15072 /* The prologue will copy SP into R0, so save it. */
15073 if (IS_STACKALIGN (func_type))
15074 save_reg_mask |= 1;
15077 /* Save registers so the exception handler can modify them. */
15078 if (crtl->calls_eh_return)
15084 reg = EH_RETURN_DATA_REGNO (i);
15085 if (reg == INVALID_REGNUM)
15087 save_reg_mask |= 1 << reg;
15091 return save_reg_mask;
15095 /* Compute the number of bytes used to store the static chain register on the
15096 stack, above the stack frame. We need to know this accurately to get the
15097 alignment of the rest of the stack frame correct. */
15099 static int arm_compute_static_chain_stack_bytes (void)
15101 unsigned long func_type = arm_current_func_type ();
15102 int static_chain_stack_bytes = 0;
15104 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15105 IS_NESTED (func_type) &&
15106 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15107 static_chain_stack_bytes = 4;
15109 return static_chain_stack_bytes;
15113 /* Compute a bit mask of which registers need to be
15114 saved on the stack for the current function.
15115 This is used by arm_get_frame_offsets, which may add extra registers. */
15117 static unsigned long
15118 arm_compute_save_reg_mask (void)
15120 unsigned int save_reg_mask = 0;
15121 unsigned long func_type = arm_current_func_type ();
15124 if (IS_NAKED (func_type))
15125 /* This should never really happen. */
15128 /* If we are creating a stack frame, then we must save the frame pointer,
15129 IP (which will hold the old stack pointer), LR and the PC. */
15130 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15132 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15135 | (1 << PC_REGNUM);
15137 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15139 /* Decide if we need to save the link register.
15140 Interrupt routines have their own banked link register,
15141 so they never need to save it.
15142 Otherwise if we do not use the link register we do not need to save
15143 it. If we are pushing other registers onto the stack however, we
15144 can save an instruction in the epilogue by pushing the link register
15145 now and then popping it back into the PC. This incurs extra memory
15146 accesses though, so we only do it when optimizing for size, and only
15147 if we know that we will not need a fancy return sequence. */
15148 if (df_regs_ever_live_p (LR_REGNUM)
15151 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15152 && !crtl->calls_eh_return))
15153 save_reg_mask |= 1 << LR_REGNUM;
15155 if (cfun->machine->lr_save_eliminated)
15156 save_reg_mask &= ~ (1 << LR_REGNUM);
15158 if (TARGET_REALLY_IWMMXT
15159 && ((bit_count (save_reg_mask)
15160 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15161 arm_compute_static_chain_stack_bytes())
15164 /* The total number of registers that are going to be pushed
15165 onto the stack is odd. We need to ensure that the stack
15166 is 64-bit aligned before we start to save iWMMXt registers,
15167 and also before we start to create locals. (A local variable
15168 might be a double or long long which we will load/store using
15169 an iWMMXt instruction). Therefore we need to push another
15170 ARM register, so that the stack will be 64-bit aligned. We
15171 try to avoid using the arg registers (r0 -r3) as they might be
15172 used to pass values in a tail call. */
15173 for (reg = 4; reg <= 12; reg++)
15174 if ((save_reg_mask & (1 << reg)) == 0)
15178 save_reg_mask |= (1 << reg);
15181 cfun->machine->sibcall_blocked = 1;
15182 save_reg_mask |= (1 << 3);
15186 /* We may need to push an additional register for use initializing the
15187 PIC base register. */
15188 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15189 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15191 reg = thumb_find_work_register (1 << 4);
15192 if (!call_used_regs[reg])
15193 save_reg_mask |= (1 << reg);
15196 return save_reg_mask;
15200 /* Compute a bit mask of which registers need to be
15201 saved on the stack for the current function. */
15202 static unsigned long
15203 thumb1_compute_save_reg_mask (void)
15205 unsigned long mask;
15209 for (reg = 0; reg < 12; reg ++)
15210 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15214 && !TARGET_SINGLE_PIC_BASE
15215 && arm_pic_register != INVALID_REGNUM
15216 && crtl->uses_pic_offset_table)
15217 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15219 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15220 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15221 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15223 /* LR will also be pushed if any lo regs are pushed. */
15224 if (mask & 0xff || thumb_force_lr_save ())
15225 mask |= (1 << LR_REGNUM);
15227 /* Make sure we have a low work register if we need one.
15228 We will need one if we are going to push a high register,
15229 but we are not currently intending to push a low register. */
15230 if ((mask & 0xff) == 0
15231 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15233 /* Use thumb_find_work_register to choose which register
15234 we will use. If the register is live then we will
15235 have to push it. Use LAST_LO_REGNUM as our fallback
15236 choice for the register to select. */
15237 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15238 /* Make sure the register returned by thumb_find_work_register is
15239 not part of the return value. */
15240 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15241 reg = LAST_LO_REGNUM;
15243 if (! call_used_regs[reg])
15247 /* The 504 below is 8 bytes less than 512 because there are two possible
15248 alignment words. We can't tell here if they will be present or not so we
15249 have to play it safe and assume that they are. */
15250 if ((CALLER_INTERWORKING_SLOT_SIZE +
15251 ROUND_UP_WORD (get_frame_size ()) +
15252 crtl->outgoing_args_size) >= 504)
15254 /* This is the same as the code in thumb1_expand_prologue() which
15255 determines which register to use for stack decrement. */
15256 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15257 if (mask & (1 << reg))
15260 if (reg > LAST_LO_REGNUM)
15262 /* Make sure we have a register available for stack decrement. */
15263 mask |= 1 << LAST_LO_REGNUM;
15271 /* Return the number of bytes required to save VFP registers. */
15273 arm_get_vfp_saved_size (void)
15275 unsigned int regno;
15280 /* Space for saved VFP registers. */
15281 if (TARGET_HARD_FLOAT && TARGET_VFP)
15284 for (regno = FIRST_VFP_REGNUM;
15285 regno < LAST_VFP_REGNUM;
15288 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15289 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15293 /* Workaround ARM10 VFPr1 bug. */
15294 if (count == 2 && !arm_arch6)
15296 saved += count * 8;
15305 if (count == 2 && !arm_arch6)
15307 saved += count * 8;
15314 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15315 everything bar the final return instruction. */
15317 output_return_instruction (rtx operand, int really_return, int reverse)
15319 char conditional[10];
15322 unsigned long live_regs_mask;
15323 unsigned long func_type;
15324 arm_stack_offsets *offsets;
15326 func_type = arm_current_func_type ();
15328 if (IS_NAKED (func_type))
15331 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15333 /* If this function was declared non-returning, and we have
15334 found a tail call, then we have to trust that the called
15335 function won't return. */
15340 /* Otherwise, trap an attempted return by aborting. */
15342 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15344 assemble_external_libcall (ops[1]);
15345 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15351 gcc_assert (!cfun->calls_alloca || really_return);
15353 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15355 cfun->machine->return_used_this_function = 1;
15357 offsets = arm_get_frame_offsets ();
15358 live_regs_mask = offsets->saved_regs_mask;
15360 if (live_regs_mask)
15362 const char * return_reg;
15364 /* If we do not have any special requirements for function exit
15365 (e.g. interworking) then we can load the return address
15366 directly into the PC. Otherwise we must load it into LR. */
15368 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15369 return_reg = reg_names[PC_REGNUM];
15371 return_reg = reg_names[LR_REGNUM];
15373 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15375 /* There are three possible reasons for the IP register
15376 being saved. 1) a stack frame was created, in which case
15377 IP contains the old stack pointer, or 2) an ISR routine
15378 corrupted it, or 3) it was saved to align the stack on
15379 iWMMXt. In case 1, restore IP into SP, otherwise just
15381 if (frame_pointer_needed)
15383 live_regs_mask &= ~ (1 << IP_REGNUM);
15384 live_regs_mask |= (1 << SP_REGNUM);
15387 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15390 /* On some ARM architectures it is faster to use LDR rather than
15391 LDM to load a single register. On other architectures, the
15392 cost is the same. In 26 bit mode, or for exception handlers,
15393 we have to use LDM to load the PC so that the CPSR is also
15395 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15396 if (live_regs_mask == (1U << reg))
15399 if (reg <= LAST_ARM_REGNUM
15400 && (reg != LR_REGNUM
15402 || ! IS_INTERRUPT (func_type)))
15404 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15405 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15412 /* Generate the load multiple instruction to restore the
15413 registers. Note we can get here, even if
15414 frame_pointer_needed is true, but only if sp already
15415 points to the base of the saved core registers. */
15416 if (live_regs_mask & (1 << SP_REGNUM))
15418 unsigned HOST_WIDE_INT stack_adjust;
15420 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15421 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15423 if (stack_adjust && arm_arch5 && TARGET_ARM)
15424 if (TARGET_UNIFIED_ASM)
15425 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15427 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15430 /* If we can't use ldmib (SA110 bug),
15431 then try to pop r3 instead. */
15433 live_regs_mask |= 1 << 3;
15435 if (TARGET_UNIFIED_ASM)
15436 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15438 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15442 if (TARGET_UNIFIED_ASM)
15443 sprintf (instr, "pop%s\t{", conditional);
15445 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15447 p = instr + strlen (instr);
15449 for (reg = 0; reg <= SP_REGNUM; reg++)
15450 if (live_regs_mask & (1 << reg))
15452 int l = strlen (reg_names[reg]);
15458 memcpy (p, ", ", 2);
15462 memcpy (p, "%|", 2);
15463 memcpy (p + 2, reg_names[reg], l);
15467 if (live_regs_mask & (1 << LR_REGNUM))
15469 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15470 /* If returning from an interrupt, restore the CPSR. */
15471 if (IS_INTERRUPT (func_type))
15478 output_asm_insn (instr, & operand);
15480 /* See if we need to generate an extra instruction to
15481 perform the actual function return. */
15483 && func_type != ARM_FT_INTERWORKED
15484 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15486 /* The return has already been handled
15487 by loading the LR into the PC. */
15494 switch ((int) ARM_FUNC_TYPE (func_type))
15498 /* ??? This is wrong for unified assembly syntax. */
15499 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15502 case ARM_FT_INTERWORKED:
15503 sprintf (instr, "bx%s\t%%|lr", conditional);
15506 case ARM_FT_EXCEPTION:
15507 /* ??? This is wrong for unified assembly syntax. */
15508 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15512 /* Use bx if it's available. */
15513 if (arm_arch5 || arm_arch4t)
15514 sprintf (instr, "bx%s\t%%|lr", conditional);
15516 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15520 output_asm_insn (instr, & operand);
15526 /* Write the function name into the code section, directly preceding
15527 the function prologue.
15529 Code will be output similar to this:
15531 .ascii "arm_poke_function_name", 0
15534 .word 0xff000000 + (t1 - t0)
15535 arm_poke_function_name
15537 stmfd sp!, {fp, ip, lr, pc}
15540 When performing a stack backtrace, code can inspect the value
15541 of 'pc' stored at 'fp' + 0. If the trace function then looks
15542 at location pc - 12 and the top 8 bits are set, then we know
15543 that there is a function name embedded immediately preceding this
15544 location and has length ((pc[-3]) & 0xff000000).
15546 We assume that pc is declared as a pointer to an unsigned long.
15548 It is of no benefit to output the function name if we are assembling
15549 a leaf function. These function types will not contain a stack
15550 backtrace structure, therefore it is not possible to determine the
15553 arm_poke_function_name (FILE *stream, const char *name)
15555 unsigned long alignlength;
15556 unsigned long length;
15559 length = strlen (name) + 1;
15560 alignlength = ROUND_UP_WORD (length);
15562 ASM_OUTPUT_ASCII (stream, name, length);
15563 ASM_OUTPUT_ALIGN (stream, 2);
15564 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15565 assemble_aligned_integer (UNITS_PER_WORD, x);
15568 /* Place some comments into the assembler stream
15569 describing the current function. */
15571 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15573 unsigned long func_type;
15575 /* ??? Do we want to print some of the below anyway? */
15579 /* Sanity check. */
15580 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15582 func_type = arm_current_func_type ();
15584 switch ((int) ARM_FUNC_TYPE (func_type))
15587 case ARM_FT_NORMAL:
15589 case ARM_FT_INTERWORKED:
15590 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15593 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15596 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15598 case ARM_FT_EXCEPTION:
15599 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15603 if (IS_NAKED (func_type))
15604 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15606 if (IS_VOLATILE (func_type))
15607 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15609 if (IS_NESTED (func_type))
15610 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15611 if (IS_STACKALIGN (func_type))
15612 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15614 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15616 crtl->args.pretend_args_size, frame_size);
15618 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15619 frame_pointer_needed,
15620 cfun->machine->uses_anonymous_args);
15622 if (cfun->machine->lr_save_eliminated)
15623 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15625 if (crtl->calls_eh_return)
15626 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15631 arm_output_epilogue (rtx sibling)
15634 unsigned long saved_regs_mask;
15635 unsigned long func_type;
15636 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15637 frame that is $fp + 4 for a non-variadic function. */
15638 int floats_offset = 0;
15640 FILE * f = asm_out_file;
15641 unsigned int lrm_count = 0;
15642 int really_return = (sibling == NULL);
15644 arm_stack_offsets *offsets;
15646 /* If we have already generated the return instruction
15647 then it is futile to generate anything else. */
15648 if (use_return_insn (FALSE, sibling) &&
15649 (cfun->machine->return_used_this_function != 0))
15652 func_type = arm_current_func_type ();
15654 if (IS_NAKED (func_type))
15655 /* Naked functions don't have epilogues. */
15658 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15662 /* A volatile function should never return. Call abort. */
15663 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15664 assemble_external_libcall (op);
15665 output_asm_insn ("bl\t%a0", &op);
15670 /* If we are throwing an exception, then we really must be doing a
15671 return, so we can't tail-call. */
15672 gcc_assert (!crtl->calls_eh_return || really_return);
15674 offsets = arm_get_frame_offsets ();
15675 saved_regs_mask = offsets->saved_regs_mask;
15678 lrm_count = bit_count (saved_regs_mask);
15680 floats_offset = offsets->saved_args;
15681 /* Compute how far away the floats will be. */
15682 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15683 if (saved_regs_mask & (1 << reg))
15684 floats_offset += 4;
15686 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15688 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15689 int vfp_offset = offsets->frame;
15691 if (TARGET_FPA_EMU2)
15693 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15694 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15696 floats_offset += 12;
15697 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15698 reg, FP_REGNUM, floats_offset - vfp_offset);
15703 start_reg = LAST_FPA_REGNUM;
15705 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15707 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15709 floats_offset += 12;
15711 /* We can't unstack more than four registers at once. */
15712 if (start_reg - reg == 3)
15714 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15715 reg, FP_REGNUM, floats_offset - vfp_offset);
15716 start_reg = reg - 1;
15721 if (reg != start_reg)
15722 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15723 reg + 1, start_reg - reg,
15724 FP_REGNUM, floats_offset - vfp_offset);
15725 start_reg = reg - 1;
15729 /* Just in case the last register checked also needs unstacking. */
15730 if (reg != start_reg)
15731 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15732 reg + 1, start_reg - reg,
15733 FP_REGNUM, floats_offset - vfp_offset);
15736 if (TARGET_HARD_FLOAT && TARGET_VFP)
15740 /* The fldmd insns do not have base+offset addressing
15741 modes, so we use IP to hold the address. */
15742 saved_size = arm_get_vfp_saved_size ();
15744 if (saved_size > 0)
15746 floats_offset += saved_size;
15747 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15748 FP_REGNUM, floats_offset - vfp_offset);
15750 start_reg = FIRST_VFP_REGNUM;
15751 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15753 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15754 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15756 if (start_reg != reg)
15757 vfp_output_fldmd (f, IP_REGNUM,
15758 (start_reg - FIRST_VFP_REGNUM) / 2,
15759 (reg - start_reg) / 2);
15760 start_reg = reg + 2;
15763 if (start_reg != reg)
15764 vfp_output_fldmd (f, IP_REGNUM,
15765 (start_reg - FIRST_VFP_REGNUM) / 2,
15766 (reg - start_reg) / 2);
15771 /* The frame pointer is guaranteed to be non-double-word aligned.
15772 This is because it is set to (old_stack_pointer - 4) and the
15773 old_stack_pointer was double word aligned. Thus the offset to
15774 the iWMMXt registers to be loaded must also be non-double-word
15775 sized, so that the resultant address *is* double-word aligned.
15776 We can ignore floats_offset since that was already included in
15777 the live_regs_mask. */
15778 lrm_count += (lrm_count % 2 ? 2 : 1);
15780 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15781 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15783 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15784 reg, FP_REGNUM, lrm_count * 4);
15789 /* saved_regs_mask should contain the IP, which at the time of stack
15790 frame generation actually contains the old stack pointer. So a
15791 quick way to unwind the stack is just pop the IP register directly
15792 into the stack pointer. */
15793 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15794 saved_regs_mask &= ~ (1 << IP_REGNUM);
15795 saved_regs_mask |= (1 << SP_REGNUM);
15797 /* There are two registers left in saved_regs_mask - LR and PC. We
15798 only need to restore the LR register (the return address), but to
15799 save time we can load it directly into the PC, unless we need a
15800 special function exit sequence, or we are not really returning. */
15802 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15803 && !crtl->calls_eh_return)
15804 /* Delete the LR from the register mask, so that the LR on
15805 the stack is loaded into the PC in the register mask. */
15806 saved_regs_mask &= ~ (1 << LR_REGNUM);
15808 saved_regs_mask &= ~ (1 << PC_REGNUM);
15810 /* We must use SP as the base register, because SP is one of the
15811 registers being restored. If an interrupt or page fault
15812 happens in the ldm instruction, the SP might or might not
15813 have been restored. That would be bad, as then SP will no
15814 longer indicate the safe area of stack, and we can get stack
15815 corruption. Using SP as the base register means that it will
15816 be reset correctly to the original value, should an interrupt
15817 occur. If the stack pointer already points at the right
15818 place, then omit the subtraction. */
15819 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15820 || cfun->calls_alloca)
15821 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15822 4 * bit_count (saved_regs_mask));
15823 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15825 if (IS_INTERRUPT (func_type))
15826 /* Interrupt handlers will have pushed the
15827 IP onto the stack, so restore it now. */
15828 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15832 /* This branch is executed for ARM mode (non-apcs frames) and
15833 Thumb-2 mode. Frame layout is essentially the same for those
15834 cases, except that in ARM mode frame pointer points to the
15835 first saved register, while in Thumb-2 mode the frame pointer points
15836 to the last saved register.
15838 It is possible to make frame pointer point to last saved
15839 register in both cases, and remove some conditionals below.
15840 That means that fp setup in prologue would be just "mov fp, sp"
15841 and sp restore in epilogue would be just "mov sp, fp", whereas
15842 now we have to use add/sub in those cases. However, the value
15843 of that would be marginal, as both mov and add/sub are 32-bit
15844 in ARM mode, and it would require extra conditionals
15845 in arm_expand_prologue to distingish ARM-apcs-frame case
15846 (where frame pointer is required to point at first register)
15847 and ARM-non-apcs-frame. Therefore, such change is postponed
15848 until real need arise. */
15849 unsigned HOST_WIDE_INT amount;
15851 /* Restore stack pointer if necessary. */
15852 if (TARGET_ARM && frame_pointer_needed)
15854 operands[0] = stack_pointer_rtx;
15855 operands[1] = hard_frame_pointer_rtx;
15857 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15858 output_add_immediate (operands);
15862 if (frame_pointer_needed)
15864 /* For Thumb-2 restore sp from the frame pointer.
15865 Operand restrictions mean we have to incrememnt FP, then copy
15867 amount = offsets->locals_base - offsets->saved_regs;
15868 operands[0] = hard_frame_pointer_rtx;
15872 unsigned long count;
15873 operands[0] = stack_pointer_rtx;
15874 amount = offsets->outgoing_args - offsets->saved_regs;
15875 /* pop call clobbered registers if it avoids a
15876 separate stack adjustment. */
15877 count = offsets->saved_regs - offsets->saved_args;
15880 && !crtl->calls_eh_return
15881 && bit_count(saved_regs_mask) * 4 == count
15882 && !IS_INTERRUPT (func_type)
15883 && !IS_STACKALIGN (func_type)
15884 && !crtl->tail_call_emit)
15886 unsigned long mask;
15887 /* Preserve return values, of any size. */
15888 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15890 mask &= ~saved_regs_mask;
15892 while (bit_count (mask) * 4 > amount)
15894 while ((mask & (1 << reg)) == 0)
15896 mask &= ~(1 << reg);
15898 if (bit_count (mask) * 4 == amount) {
15900 saved_regs_mask |= mask;
15907 operands[1] = operands[0];
15908 operands[2] = GEN_INT (amount);
15909 output_add_immediate (operands);
15911 if (frame_pointer_needed)
15912 asm_fprintf (f, "\tmov\t%r, %r\n",
15913 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15916 if (TARGET_FPA_EMU2)
15918 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15919 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15920 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15925 start_reg = FIRST_FPA_REGNUM;
15927 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15929 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15931 if (reg - start_reg == 3)
15933 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15934 start_reg, SP_REGNUM);
15935 start_reg = reg + 1;
15940 if (reg != start_reg)
15941 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15942 start_reg, reg - start_reg,
15945 start_reg = reg + 1;
15949 /* Just in case the last register checked also needs unstacking. */
15950 if (reg != start_reg)
15951 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15952 start_reg, reg - start_reg, SP_REGNUM);
15955 if (TARGET_HARD_FLOAT && TARGET_VFP)
15957 int end_reg = LAST_VFP_REGNUM + 1;
15959 /* Scan the registers in reverse order. We need to match
15960 any groupings made in the prologue and generate matching
15962 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15964 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15965 && (!df_regs_ever_live_p (reg + 1)
15966 || call_used_regs[reg + 1]))
15968 if (end_reg > reg + 2)
15969 vfp_output_fldmd (f, SP_REGNUM,
15970 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15971 (end_reg - (reg + 2)) / 2);
15975 if (end_reg > reg + 2)
15976 vfp_output_fldmd (f, SP_REGNUM, 0,
15977 (end_reg - (reg + 2)) / 2);
15981 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15982 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15983 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15985 /* If we can, restore the LR into the PC. */
15986 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15987 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15988 && !IS_STACKALIGN (func_type)
15990 && crtl->args.pretend_args_size == 0
15991 && saved_regs_mask & (1 << LR_REGNUM)
15992 && !crtl->calls_eh_return)
15994 saved_regs_mask &= ~ (1 << LR_REGNUM);
15995 saved_regs_mask |= (1 << PC_REGNUM);
15996 rfe = IS_INTERRUPT (func_type);
16001 /* Load the registers off the stack. If we only have one register
16002 to load use the LDR instruction - it is faster. For Thumb-2
16003 always use pop and the assembler will pick the best instruction.*/
16004 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
16005 && !IS_INTERRUPT(func_type))
16007 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
16009 else if (saved_regs_mask)
16011 if (saved_regs_mask & (1 << SP_REGNUM))
16012 /* Note - write back to the stack register is not enabled
16013 (i.e. "ldmfd sp!..."). We know that the stack pointer is
16014 in the list of registers and if we add writeback the
16015 instruction becomes UNPREDICTABLE. */
16016 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
16018 else if (TARGET_ARM)
16019 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
16022 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
16025 if (crtl->args.pretend_args_size)
16027 /* Unwind the pre-pushed regs. */
16028 operands[0] = operands[1] = stack_pointer_rtx;
16029 operands[2] = GEN_INT (crtl->args.pretend_args_size);
16030 output_add_immediate (operands);
16034 /* We may have already restored PC directly from the stack. */
16035 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16038 /* Stack adjustment for exception handler. */
16039 if (crtl->calls_eh_return)
16040 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16041 ARM_EH_STACKADJ_REGNUM);
16043 /* Generate the return instruction. */
16044 switch ((int) ARM_FUNC_TYPE (func_type))
16048 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16051 case ARM_FT_EXCEPTION:
16052 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16055 case ARM_FT_INTERWORKED:
16056 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16060 if (IS_STACKALIGN (func_type))
16062 /* See comment in arm_expand_prologue. */
16063 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16065 if (arm_arch5 || arm_arch4t)
16066 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16068 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16076 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16077 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16079 arm_stack_offsets *offsets;
16085 /* Emit any call-via-reg trampolines that are needed for v4t support
16086 of call_reg and call_value_reg type insns. */
16087 for (regno = 0; regno < LR_REGNUM; regno++)
16089 rtx label = cfun->machine->call_via[regno];
16093 switch_to_section (function_section (current_function_decl));
16094 targetm.asm_out.internal_label (asm_out_file, "L",
16095 CODE_LABEL_NUMBER (label));
16096 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16100 /* ??? Probably not safe to set this here, since it assumes that a
16101 function will be emitted as assembly immediately after we generate
16102 RTL for it. This does not happen for inline functions. */
16103 cfun->machine->return_used_this_function = 0;
16105 else /* TARGET_32BIT */
16107 /* We need to take into account any stack-frame rounding. */
16108 offsets = arm_get_frame_offsets ();
16110 gcc_assert (!use_return_insn (FALSE, NULL)
16111 || (cfun->machine->return_used_this_function != 0)
16112 || offsets->saved_regs == offsets->outgoing_args
16113 || frame_pointer_needed);
16115 /* Reset the ARM-specific per-function variables. */
16116 after_arm_reorg = 0;
16120 /* Generate and emit an insn that we will recognize as a push_multi.
16121 Unfortunately, since this insn does not reflect very well the actual
16122 semantics of the operation, we need to annotate the insn for the benefit
16123 of DWARF2 frame unwind information. */
16125 emit_multi_reg_push (unsigned long mask)
16128 int num_dwarf_regs;
16132 int dwarf_par_index;
16135 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16136 if (mask & (1 << i))
16139 gcc_assert (num_regs && num_regs <= 16);
16141 /* We don't record the PC in the dwarf frame information. */
16142 num_dwarf_regs = num_regs;
16143 if (mask & (1 << PC_REGNUM))
16146 /* For the body of the insn we are going to generate an UNSPEC in
16147 parallel with several USEs. This allows the insn to be recognized
16148 by the push_multi pattern in the arm.md file.
16150 The body of the insn looks something like this:
16153 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16154 (const_int:SI <num>)))
16155 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16161 For the frame note however, we try to be more explicit and actually
16162 show each register being stored into the stack frame, plus a (single)
16163 decrement of the stack pointer. We do it this way in order to be
16164 friendly to the stack unwinding code, which only wants to see a single
16165 stack decrement per instruction. The RTL we generate for the note looks
16166 something like this:
16169 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16170 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16171 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16172 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16176 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16177 instead we'd have a parallel expression detailing all
16178 the stores to the various memory addresses so that debug
16179 information is more up-to-date. Remember however while writing
16180 this to take care of the constraints with the push instruction.
16182 Note also that this has to be taken care of for the VFP registers.
16184 For more see PR43399. */
16186 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16187 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16188 dwarf_par_index = 1;
16190 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16192 if (mask & (1 << i))
16194 reg = gen_rtx_REG (SImode, i);
16196 XVECEXP (par, 0, 0)
16197 = gen_rtx_SET (VOIDmode,
16200 gen_rtx_PRE_MODIFY (Pmode,
16203 (stack_pointer_rtx,
16206 gen_rtx_UNSPEC (BLKmode,
16207 gen_rtvec (1, reg),
16208 UNSPEC_PUSH_MULT));
16210 if (i != PC_REGNUM)
16212 tmp = gen_rtx_SET (VOIDmode,
16213 gen_frame_mem (SImode, stack_pointer_rtx),
16215 RTX_FRAME_RELATED_P (tmp) = 1;
16216 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16224 for (j = 1, i++; j < num_regs; i++)
16226 if (mask & (1 << i))
16228 reg = gen_rtx_REG (SImode, i);
16230 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16232 if (i != PC_REGNUM)
16235 = gen_rtx_SET (VOIDmode,
16238 plus_constant (stack_pointer_rtx,
16241 RTX_FRAME_RELATED_P (tmp) = 1;
16242 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16249 par = emit_insn (par);
16251 tmp = gen_rtx_SET (VOIDmode,
16253 plus_constant (stack_pointer_rtx, -4 * num_regs));
16254 RTX_FRAME_RELATED_P (tmp) = 1;
16255 XVECEXP (dwarf, 0, 0) = tmp;
16257 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16262 /* Calculate the size of the return value that is passed in registers. */
16264 arm_size_return_regs (void)
16266 enum machine_mode mode;
16268 if (crtl->return_rtx != 0)
16269 mode = GET_MODE (crtl->return_rtx);
16271 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16273 return GET_MODE_SIZE (mode);
16277 emit_sfm (int base_reg, int count)
16284 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16285 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16287 reg = gen_rtx_REG (XFmode, base_reg++);
16289 XVECEXP (par, 0, 0)
16290 = gen_rtx_SET (VOIDmode,
16293 gen_rtx_PRE_MODIFY (Pmode,
16296 (stack_pointer_rtx,
16299 gen_rtx_UNSPEC (BLKmode,
16300 gen_rtvec (1, reg),
16301 UNSPEC_PUSH_MULT));
16302 tmp = gen_rtx_SET (VOIDmode,
16303 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16304 RTX_FRAME_RELATED_P (tmp) = 1;
16305 XVECEXP (dwarf, 0, 1) = tmp;
16307 for (i = 1; i < count; i++)
16309 reg = gen_rtx_REG (XFmode, base_reg++);
16310 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16312 tmp = gen_rtx_SET (VOIDmode,
16313 gen_frame_mem (XFmode,
16314 plus_constant (stack_pointer_rtx,
16317 RTX_FRAME_RELATED_P (tmp) = 1;
16318 XVECEXP (dwarf, 0, i + 1) = tmp;
16321 tmp = gen_rtx_SET (VOIDmode,
16323 plus_constant (stack_pointer_rtx, -12 * count));
16325 RTX_FRAME_RELATED_P (tmp) = 1;
16326 XVECEXP (dwarf, 0, 0) = tmp;
16328 par = emit_insn (par);
16329 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16335 /* Return true if the current function needs to save/restore LR. */
16338 thumb_force_lr_save (void)
16340 return !cfun->machine->lr_save_eliminated
16341 && (!leaf_function_p ()
16342 || thumb_far_jump_used_p ()
16343 || df_regs_ever_live_p (LR_REGNUM));
16347 /* Return true if r3 is used by any of the tail call insns in the
16348 current function. */
16351 any_sibcall_uses_r3 (void)
16356 if (!crtl->tail_call_emit)
16358 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16359 if (e->flags & EDGE_SIBCALL)
16361 rtx call = BB_END (e->src);
16362 if (!CALL_P (call))
16363 call = prev_nonnote_nondebug_insn (call);
16364 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16365 if (find_regno_fusage (call, USE, 3))
16372 /* Compute the distance from register FROM to register TO.
16373 These can be the arg pointer (26), the soft frame pointer (25),
16374 the stack pointer (13) or the hard frame pointer (11).
16375 In thumb mode r7 is used as the soft frame pointer, if needed.
16376 Typical stack layout looks like this:
16378 old stack pointer -> | |
16381 | | saved arguments for
16382 | | vararg functions
16385 hard FP & arg pointer -> | | \
16393 soft frame pointer -> | | /
16398 locals base pointer -> | | /
16403 current stack pointer -> | | /
16406 For a given function some or all of these stack components
16407 may not be needed, giving rise to the possibility of
16408 eliminating some of the registers.
16410 The values returned by this function must reflect the behavior
16411 of arm_expand_prologue() and arm_compute_save_reg_mask().
16413 The sign of the number returned reflects the direction of stack
16414 growth, so the values are positive for all eliminations except
16415 from the soft frame pointer to the hard frame pointer.
16417 SFP may point just inside the local variables block to ensure correct
16421 /* Calculate stack offsets. These are used to calculate register elimination
16422 offsets and in prologue/epilogue code. Also calculates which registers
16423 should be saved. */
16425 static arm_stack_offsets *
16426 arm_get_frame_offsets (void)
16428 struct arm_stack_offsets *offsets;
16429 unsigned long func_type;
16433 HOST_WIDE_INT frame_size;
16436 offsets = &cfun->machine->stack_offsets;
16438 /* We need to know if we are a leaf function. Unfortunately, it
16439 is possible to be called after start_sequence has been called,
16440 which causes get_insns to return the insns for the sequence,
16441 not the function, which will cause leaf_function_p to return
16442 the incorrect result.
16444 to know about leaf functions once reload has completed, and the
16445 frame size cannot be changed after that time, so we can safely
16446 use the cached value. */
16448 if (reload_completed)
16451 /* Initially this is the size of the local variables. It will translated
16452 into an offset once we have determined the size of preceding data. */
16453 frame_size = ROUND_UP_WORD (get_frame_size ());
16455 leaf = leaf_function_p ();
16457 /* Space for variadic functions. */
16458 offsets->saved_args = crtl->args.pretend_args_size;
16460 /* In Thumb mode this is incorrect, but never used. */
16461 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16462 arm_compute_static_chain_stack_bytes();
16466 unsigned int regno;
16468 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16469 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16470 saved = core_saved;
16472 /* We know that SP will be doubleword aligned on entry, and we must
16473 preserve that condition at any subroutine call. We also require the
16474 soft frame pointer to be doubleword aligned. */
16476 if (TARGET_REALLY_IWMMXT)
16478 /* Check for the call-saved iWMMXt registers. */
16479 for (regno = FIRST_IWMMXT_REGNUM;
16480 regno <= LAST_IWMMXT_REGNUM;
16482 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16486 func_type = arm_current_func_type ();
16487 if (! IS_VOLATILE (func_type))
16489 /* Space for saved FPA registers. */
16490 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16491 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16494 /* Space for saved VFP registers. */
16495 if (TARGET_HARD_FLOAT && TARGET_VFP)
16496 saved += arm_get_vfp_saved_size ();
16499 else /* TARGET_THUMB1 */
16501 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16502 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16503 saved = core_saved;
16504 if (TARGET_BACKTRACE)
16508 /* Saved registers include the stack frame. */
16509 offsets->saved_regs = offsets->saved_args + saved +
16510 arm_compute_static_chain_stack_bytes();
16511 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16512 /* A leaf function does not need any stack alignment if it has nothing
16514 if (leaf && frame_size == 0
16515 /* However if it calls alloca(), we have a dynamically allocated
16516 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16517 && ! cfun->calls_alloca)
16519 offsets->outgoing_args = offsets->soft_frame;
16520 offsets->locals_base = offsets->soft_frame;
16524 /* Ensure SFP has the correct alignment. */
16525 if (ARM_DOUBLEWORD_ALIGN
16526 && (offsets->soft_frame & 7))
16528 offsets->soft_frame += 4;
16529 /* Try to align stack by pushing an extra reg. Don't bother doing this
16530 when there is a stack frame as the alignment will be rolled into
16531 the normal stack adjustment. */
16532 if (frame_size + crtl->outgoing_args_size == 0)
16536 /* If it is safe to use r3, then do so. This sometimes
16537 generates better code on Thumb-2 by avoiding the need to
16538 use 32-bit push/pop instructions. */
16539 if (! any_sibcall_uses_r3 ()
16540 && arm_size_return_regs () <= 12
16541 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16546 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16548 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16557 offsets->saved_regs += 4;
16558 offsets->saved_regs_mask |= (1 << reg);
16563 offsets->locals_base = offsets->soft_frame + frame_size;
16564 offsets->outgoing_args = (offsets->locals_base
16565 + crtl->outgoing_args_size);
16567 if (ARM_DOUBLEWORD_ALIGN)
16569 /* Ensure SP remains doubleword aligned. */
16570 if (offsets->outgoing_args & 7)
16571 offsets->outgoing_args += 4;
16572 gcc_assert (!(offsets->outgoing_args & 7));
16579 /* Calculate the relative offsets for the different stack pointers. Positive
16580 offsets are in the direction of stack growth. */
16583 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16585 arm_stack_offsets *offsets;
16587 offsets = arm_get_frame_offsets ();
16589 /* OK, now we have enough information to compute the distances.
16590 There must be an entry in these switch tables for each pair
16591 of registers in ELIMINABLE_REGS, even if some of the entries
16592 seem to be redundant or useless. */
16595 case ARG_POINTER_REGNUM:
16598 case THUMB_HARD_FRAME_POINTER_REGNUM:
16601 case FRAME_POINTER_REGNUM:
16602 /* This is the reverse of the soft frame pointer
16603 to hard frame pointer elimination below. */
16604 return offsets->soft_frame - offsets->saved_args;
16606 case ARM_HARD_FRAME_POINTER_REGNUM:
16607 /* This is only non-zero in the case where the static chain register
16608 is stored above the frame. */
16609 return offsets->frame - offsets->saved_args - 4;
16611 case STACK_POINTER_REGNUM:
16612 /* If nothing has been pushed on the stack at all
16613 then this will return -4. This *is* correct! */
16614 return offsets->outgoing_args - (offsets->saved_args + 4);
16617 gcc_unreachable ();
16619 gcc_unreachable ();
16621 case FRAME_POINTER_REGNUM:
16624 case THUMB_HARD_FRAME_POINTER_REGNUM:
16627 case ARM_HARD_FRAME_POINTER_REGNUM:
16628 /* The hard frame pointer points to the top entry in the
16629 stack frame. The soft frame pointer to the bottom entry
16630 in the stack frame. If there is no stack frame at all,
16631 then they are identical. */
16633 return offsets->frame - offsets->soft_frame;
16635 case STACK_POINTER_REGNUM:
16636 return offsets->outgoing_args - offsets->soft_frame;
16639 gcc_unreachable ();
16641 gcc_unreachable ();
16644 /* You cannot eliminate from the stack pointer.
16645 In theory you could eliminate from the hard frame
16646 pointer to the stack pointer, but this will never
16647 happen, since if a stack frame is not needed the
16648 hard frame pointer will never be used. */
16649 gcc_unreachable ();
16653 /* Given FROM and TO register numbers, say whether this elimination is
16654 allowed. Frame pointer elimination is automatically handled.
16656 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16657 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16658 pointer, we must eliminate FRAME_POINTER_REGNUM into
16659 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16660 ARG_POINTER_REGNUM. */
16663 arm_can_eliminate (const int from, const int to)
16665 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16666 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16667 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16668 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16672 /* Emit RTL to save coprocessor registers on function entry. Returns the
16673 number of bytes pushed. */
16676 arm_save_coproc_regs(void)
16678 int saved_size = 0;
16680 unsigned start_reg;
16683 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16684 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16686 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16687 insn = gen_rtx_MEM (V2SImode, insn);
16688 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16689 RTX_FRAME_RELATED_P (insn) = 1;
16693 /* Save any floating point call-saved registers used by this
16695 if (TARGET_FPA_EMU2)
16697 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16698 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16700 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16701 insn = gen_rtx_MEM (XFmode, insn);
16702 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16703 RTX_FRAME_RELATED_P (insn) = 1;
16709 start_reg = LAST_FPA_REGNUM;
16711 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16713 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16715 if (start_reg - reg == 3)
16717 insn = emit_sfm (reg, 4);
16718 RTX_FRAME_RELATED_P (insn) = 1;
16720 start_reg = reg - 1;
16725 if (start_reg != reg)
16727 insn = emit_sfm (reg + 1, start_reg - reg);
16728 RTX_FRAME_RELATED_P (insn) = 1;
16729 saved_size += (start_reg - reg) * 12;
16731 start_reg = reg - 1;
16735 if (start_reg != reg)
16737 insn = emit_sfm (reg + 1, start_reg - reg);
16738 saved_size += (start_reg - reg) * 12;
16739 RTX_FRAME_RELATED_P (insn) = 1;
16742 if (TARGET_HARD_FLOAT && TARGET_VFP)
16744 start_reg = FIRST_VFP_REGNUM;
16746 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16748 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16749 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16751 if (start_reg != reg)
16752 saved_size += vfp_emit_fstmd (start_reg,
16753 (reg - start_reg) / 2);
16754 start_reg = reg + 2;
16757 if (start_reg != reg)
16758 saved_size += vfp_emit_fstmd (start_reg,
16759 (reg - start_reg) / 2);
16765 /* Set the Thumb frame pointer from the stack pointer. */
16768 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16770 HOST_WIDE_INT amount;
16773 amount = offsets->outgoing_args - offsets->locals_base;
16775 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16776 stack_pointer_rtx, GEN_INT (amount)));
16779 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16780 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16781 expects the first two operands to be the same. */
16784 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16786 hard_frame_pointer_rtx));
16790 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16791 hard_frame_pointer_rtx,
16792 stack_pointer_rtx));
16794 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16795 plus_constant (stack_pointer_rtx, amount));
16796 RTX_FRAME_RELATED_P (dwarf) = 1;
16797 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16800 RTX_FRAME_RELATED_P (insn) = 1;
16803 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16806 arm_expand_prologue (void)
16811 unsigned long live_regs_mask;
16812 unsigned long func_type;
16814 int saved_pretend_args = 0;
16815 int saved_regs = 0;
16816 unsigned HOST_WIDE_INT args_to_push;
16817 arm_stack_offsets *offsets;
16819 func_type = arm_current_func_type ();
16821 /* Naked functions don't have prologues. */
16822 if (IS_NAKED (func_type))
16825 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16826 args_to_push = crtl->args.pretend_args_size;
16828 /* Compute which register we will have to save onto the stack. */
16829 offsets = arm_get_frame_offsets ();
16830 live_regs_mask = offsets->saved_regs_mask;
16832 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16834 if (IS_STACKALIGN (func_type))
16838 /* Handle a word-aligned stack pointer. We generate the following:
16843 <save and restore r0 in normal prologue/epilogue>
16847 The unwinder doesn't need to know about the stack realignment.
16848 Just tell it we saved SP in r0. */
16849 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16851 r0 = gen_rtx_REG (SImode, 0);
16852 r1 = gen_rtx_REG (SImode, 1);
16854 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16855 RTX_FRAME_RELATED_P (insn) = 1;
16856 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16858 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16860 /* ??? The CFA changes here, which may cause GDB to conclude that it
16861 has entered a different function. That said, the unwind info is
16862 correct, individually, before and after this instruction because
16863 we've described the save of SP, which will override the default
16864 handling of SP as restoring from the CFA. */
16865 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16868 /* For APCS frames, if IP register is clobbered
16869 when creating frame, save that register in a special
16871 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16873 if (IS_INTERRUPT (func_type))
16875 /* Interrupt functions must not corrupt any registers.
16876 Creating a frame pointer however, corrupts the IP
16877 register, so we must push it first. */
16878 emit_multi_reg_push (1 << IP_REGNUM);
16880 /* Do not set RTX_FRAME_RELATED_P on this insn.
16881 The dwarf stack unwinding code only wants to see one
16882 stack decrement per function, and this is not it. If
16883 this instruction is labeled as being part of the frame
16884 creation sequence then dwarf2out_frame_debug_expr will
16885 die when it encounters the assignment of IP to FP
16886 later on, since the use of SP here establishes SP as
16887 the CFA register and not IP.
16889 Anyway this instruction is not really part of the stack
16890 frame creation although it is part of the prologue. */
16892 else if (IS_NESTED (func_type))
16894 /* The Static chain register is the same as the IP register
16895 used as a scratch register during stack frame creation.
16896 To get around this need to find somewhere to store IP
16897 whilst the frame is being created. We try the following
16900 1. The last argument register.
16901 2. A slot on the stack above the frame. (This only
16902 works if the function is not a varargs function).
16903 3. Register r3, after pushing the argument registers
16906 Note - we only need to tell the dwarf2 backend about the SP
16907 adjustment in the second variant; the static chain register
16908 doesn't need to be unwound, as it doesn't contain a value
16909 inherited from the caller. */
16911 if (df_regs_ever_live_p (3) == false)
16912 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16913 else if (args_to_push == 0)
16917 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16920 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16921 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16924 /* Just tell the dwarf backend that we adjusted SP. */
16925 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16926 plus_constant (stack_pointer_rtx,
16928 RTX_FRAME_RELATED_P (insn) = 1;
16929 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16933 /* Store the args on the stack. */
16934 if (cfun->machine->uses_anonymous_args)
16935 insn = emit_multi_reg_push
16936 ((0xf0 >> (args_to_push / 4)) & 0xf);
16939 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16940 GEN_INT (- args_to_push)));
16942 RTX_FRAME_RELATED_P (insn) = 1;
16944 saved_pretend_args = 1;
16945 fp_offset = args_to_push;
16948 /* Now reuse r3 to preserve IP. */
16949 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16953 insn = emit_set_insn (ip_rtx,
16954 plus_constant (stack_pointer_rtx, fp_offset));
16955 RTX_FRAME_RELATED_P (insn) = 1;
16960 /* Push the argument registers, or reserve space for them. */
16961 if (cfun->machine->uses_anonymous_args)
16962 insn = emit_multi_reg_push
16963 ((0xf0 >> (args_to_push / 4)) & 0xf);
16966 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16967 GEN_INT (- args_to_push)));
16968 RTX_FRAME_RELATED_P (insn) = 1;
16971 /* If this is an interrupt service routine, and the link register
16972 is going to be pushed, and we're not generating extra
16973 push of IP (needed when frame is needed and frame layout if apcs),
16974 subtracting four from LR now will mean that the function return
16975 can be done with a single instruction. */
16976 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16977 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16978 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16981 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16983 emit_set_insn (lr, plus_constant (lr, -4));
16986 if (live_regs_mask)
16988 saved_regs += bit_count (live_regs_mask) * 4;
16989 if (optimize_size && !frame_pointer_needed
16990 && saved_regs == offsets->saved_regs - offsets->saved_args)
16992 /* If no coprocessor registers are being pushed and we don't have
16993 to worry about a frame pointer then push extra registers to
16994 create the stack frame. This is done is a way that does not
16995 alter the frame layout, so is independent of the epilogue. */
16999 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17001 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17002 if (frame && n * 4 >= frame)
17005 live_regs_mask |= (1 << n) - 1;
17006 saved_regs += frame;
17009 insn = emit_multi_reg_push (live_regs_mask);
17010 RTX_FRAME_RELATED_P (insn) = 1;
17013 if (! IS_VOLATILE (func_type))
17014 saved_regs += arm_save_coproc_regs ();
17016 if (frame_pointer_needed && TARGET_ARM)
17018 /* Create the new frame pointer. */
17019 if (TARGET_APCS_FRAME)
17021 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17022 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17023 RTX_FRAME_RELATED_P (insn) = 1;
17025 if (IS_NESTED (func_type))
17027 /* Recover the static chain register. */
17028 if (!df_regs_ever_live_p (3)
17029 || saved_pretend_args)
17030 insn = gen_rtx_REG (SImode, 3);
17031 else /* if (crtl->args.pretend_args_size == 0) */
17033 insn = plus_constant (hard_frame_pointer_rtx, 4);
17034 insn = gen_frame_mem (SImode, insn);
17036 emit_set_insn (ip_rtx, insn);
17037 /* Add a USE to stop propagate_one_insn() from barfing. */
17038 emit_insn (gen_prologue_use (ip_rtx));
17043 insn = GEN_INT (saved_regs - 4);
17044 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17045 stack_pointer_rtx, insn));
17046 RTX_FRAME_RELATED_P (insn) = 1;
17050 if (flag_stack_usage_info)
17051 current_function_static_stack_size
17052 = offsets->outgoing_args - offsets->saved_args;
17054 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17056 /* This add can produce multiple insns for a large constant, so we
17057 need to get tricky. */
17058 rtx last = get_last_insn ();
17060 amount = GEN_INT (offsets->saved_args + saved_regs
17061 - offsets->outgoing_args);
17063 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17067 last = last ? NEXT_INSN (last) : get_insns ();
17068 RTX_FRAME_RELATED_P (last) = 1;
17070 while (last != insn);
17072 /* If the frame pointer is needed, emit a special barrier that
17073 will prevent the scheduler from moving stores to the frame
17074 before the stack adjustment. */
17075 if (frame_pointer_needed)
17076 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17077 hard_frame_pointer_rtx));
17081 if (frame_pointer_needed && TARGET_THUMB2)
17082 thumb_set_frame_pointer (offsets);
17084 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17086 unsigned long mask;
17088 mask = live_regs_mask;
17089 mask &= THUMB2_WORK_REGS;
17090 if (!IS_NESTED (func_type))
17091 mask |= (1 << IP_REGNUM);
17092 arm_load_pic_register (mask);
17095 /* If we are profiling, make sure no instructions are scheduled before
17096 the call to mcount. Similarly if the user has requested no
17097 scheduling in the prolog. Similarly if we want non-call exceptions
17098 using the EABI unwinder, to prevent faulting instructions from being
17099 swapped with a stack adjustment. */
17100 if (crtl->profile || !TARGET_SCHED_PROLOG
17101 || (arm_except_unwind_info (&global_options) == UI_TARGET
17102 && cfun->can_throw_non_call_exceptions))
17103 emit_insn (gen_blockage ());
17105 /* If the link register is being kept alive, with the return address in it,
17106 then make sure that it does not get reused by the ce2 pass. */
17107 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17108 cfun->machine->lr_save_eliminated = 1;
17111 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17113 arm_print_condition (FILE *stream)
17115 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17117 /* Branch conversion is not implemented for Thumb-2. */
17120 output_operand_lossage ("predicated Thumb instruction");
17123 if (current_insn_predicate != NULL)
17125 output_operand_lossage
17126 ("predicated instruction in conditional sequence");
17130 fputs (arm_condition_codes[arm_current_cc], stream);
17132 else if (current_insn_predicate)
17134 enum arm_cond_code code;
17138 output_operand_lossage ("predicated Thumb instruction");
17142 code = get_arm_condition_code (current_insn_predicate);
17143 fputs (arm_condition_codes[code], stream);
17148 /* If CODE is 'd', then the X is a condition operand and the instruction
17149 should only be executed if the condition is true.
17150 if CODE is 'D', then the X is a condition operand and the instruction
17151 should only be executed if the condition is false: however, if the mode
17152 of the comparison is CCFPEmode, then always execute the instruction -- we
17153 do this because in these circumstances !GE does not necessarily imply LT;
17154 in these cases the instruction pattern will take care to make sure that
17155 an instruction containing %d will follow, thereby undoing the effects of
17156 doing this instruction unconditionally.
17157 If CODE is 'N' then X is a floating point operand that must be negated
17159 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17160 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17162 arm_print_operand (FILE *stream, rtx x, int code)
17167 fputs (ASM_COMMENT_START, stream);
17171 fputs (user_label_prefix, stream);
17175 fputs (REGISTER_PREFIX, stream);
17179 arm_print_condition (stream);
17183 /* Nothing in unified syntax, otherwise the current condition code. */
17184 if (!TARGET_UNIFIED_ASM)
17185 arm_print_condition (stream);
17189 /* The current condition code in unified syntax, otherwise nothing. */
17190 if (TARGET_UNIFIED_ASM)
17191 arm_print_condition (stream);
17195 /* The current condition code for a condition code setting instruction.
17196 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17197 if (TARGET_UNIFIED_ASM)
17199 fputc('s', stream);
17200 arm_print_condition (stream);
17204 arm_print_condition (stream);
17205 fputc('s', stream);
17210 /* If the instruction is conditionally executed then print
17211 the current condition code, otherwise print 's'. */
17212 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17213 if (current_insn_predicate)
17214 arm_print_condition (stream);
17216 fputc('s', stream);
17219 /* %# is a "break" sequence. It doesn't output anything, but is used to
17220 separate e.g. operand numbers from following text, if that text consists
17221 of further digits which we don't want to be part of the operand
17229 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17230 r = real_value_negate (&r);
17231 fprintf (stream, "%s", fp_const_from_val (&r));
17235 /* An integer or symbol address without a preceding # sign. */
17237 switch (GET_CODE (x))
17240 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17244 output_addr_const (stream, x);
17248 if (GET_CODE (XEXP (x, 0)) == PLUS
17249 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17251 output_addr_const (stream, x);
17254 /* Fall through. */
17257 output_operand_lossage ("Unsupported operand for code '%c'", code);
17262 if (GET_CODE (x) == CONST_INT)
17265 val = ARM_SIGN_EXTEND (~INTVAL (x));
17266 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17270 putc ('~', stream);
17271 output_addr_const (stream, x);
17276 /* The low 16 bits of an immediate constant. */
17277 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17281 fprintf (stream, "%s", arithmetic_instr (x, 1));
17284 /* Truncate Cirrus shift counts. */
17286 if (GET_CODE (x) == CONST_INT)
17288 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17291 arm_print_operand (stream, x, 0);
17295 fprintf (stream, "%s", arithmetic_instr (x, 0));
17303 if (!shift_operator (x, SImode))
17305 output_operand_lossage ("invalid shift operand");
17309 shift = shift_op (x, &val);
17313 fprintf (stream, ", %s ", shift);
17315 arm_print_operand (stream, XEXP (x, 1), 0);
17317 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17322 /* An explanation of the 'Q', 'R' and 'H' register operands:
17324 In a pair of registers containing a DI or DF value the 'Q'
17325 operand returns the register number of the register containing
17326 the least significant part of the value. The 'R' operand returns
17327 the register number of the register containing the most
17328 significant part of the value.
17330 The 'H' operand returns the higher of the two register numbers.
17331 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17332 same as the 'Q' operand, since the most significant part of the
17333 value is held in the lower number register. The reverse is true
17334 on systems where WORDS_BIG_ENDIAN is false.
17336 The purpose of these operands is to distinguish between cases
17337 where the endian-ness of the values is important (for example
17338 when they are added together), and cases where the endian-ness
17339 is irrelevant, but the order of register operations is important.
17340 For example when loading a value from memory into a register
17341 pair, the endian-ness does not matter. Provided that the value
17342 from the lower memory address is put into the lower numbered
17343 register, and the value from the higher address is put into the
17344 higher numbered register, the load will work regardless of whether
17345 the value being loaded is big-wordian or little-wordian. The
17346 order of the two register loads can matter however, if the address
17347 of the memory location is actually held in one of the registers
17348 being overwritten by the load.
17350 The 'Q' and 'R' constraints are also available for 64-bit
17353 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17355 rtx part = gen_lowpart (SImode, x);
17356 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17360 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17362 output_operand_lossage ("invalid operand for code '%c'", code);
17366 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17370 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17372 enum machine_mode mode = GET_MODE (x);
17375 if (mode == VOIDmode)
17377 part = gen_highpart_mode (SImode, mode, x);
17378 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17382 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17384 output_operand_lossage ("invalid operand for code '%c'", code);
17388 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17392 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17394 output_operand_lossage ("invalid operand for code '%c'", code);
17398 asm_fprintf (stream, "%r", REGNO (x) + 1);
17402 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17404 output_operand_lossage ("invalid operand for code '%c'", code);
17408 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17412 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17414 output_operand_lossage ("invalid operand for code '%c'", code);
17418 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17422 asm_fprintf (stream, "%r",
17423 GET_CODE (XEXP (x, 0)) == REG
17424 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17428 asm_fprintf (stream, "{%r-%r}",
17430 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17433 /* Like 'M', but writing doubleword vector registers, for use by Neon
17437 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17438 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17440 asm_fprintf (stream, "{d%d}", regno);
17442 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17447 /* CONST_TRUE_RTX means always -- that's the default. */
17448 if (x == const_true_rtx)
17451 if (!COMPARISON_P (x))
17453 output_operand_lossage ("invalid operand for code '%c'", code);
17457 fputs (arm_condition_codes[get_arm_condition_code (x)],
17462 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17463 want to do that. */
17464 if (x == const_true_rtx)
17466 output_operand_lossage ("instruction never executed");
17469 if (!COMPARISON_P (x))
17471 output_operand_lossage ("invalid operand for code '%c'", code);
17475 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17476 (get_arm_condition_code (x))],
17480 /* Cirrus registers can be accessed in a variety of ways:
17481 single floating point (f)
17482 double floating point (d)
17484 64bit integer (dx). */
17485 case 'W': /* Cirrus register in F mode. */
17486 case 'X': /* Cirrus register in D mode. */
17487 case 'Y': /* Cirrus register in FX mode. */
17488 case 'Z': /* Cirrus register in DX mode. */
17489 gcc_assert (GET_CODE (x) == REG
17490 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17492 fprintf (stream, "mv%s%s",
17494 : code == 'X' ? "d"
17495 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17499 /* Print cirrus register in the mode specified by the register's mode. */
17502 int mode = GET_MODE (x);
17504 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17506 output_operand_lossage ("invalid operand for code '%c'", code);
17510 fprintf (stream, "mv%s%s",
17511 mode == DFmode ? "d"
17512 : mode == SImode ? "fx"
17513 : mode == DImode ? "dx"
17514 : "f", reg_names[REGNO (x)] + 2);
17520 if (GET_CODE (x) != REG
17521 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17522 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17523 /* Bad value for wCG register number. */
17525 output_operand_lossage ("invalid operand for code '%c'", code);
17530 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17533 /* Print an iWMMXt control register name. */
17535 if (GET_CODE (x) != CONST_INT
17537 || INTVAL (x) >= 16)
17538 /* Bad value for wC register number. */
17540 output_operand_lossage ("invalid operand for code '%c'", code);
17546 static const char * wc_reg_names [16] =
17548 "wCID", "wCon", "wCSSF", "wCASF",
17549 "wC4", "wC5", "wC6", "wC7",
17550 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17551 "wC12", "wC13", "wC14", "wC15"
17554 fprintf (stream, wc_reg_names [INTVAL (x)]);
17558 /* Print the high single-precision register of a VFP double-precision
17562 int mode = GET_MODE (x);
17565 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17567 output_operand_lossage ("invalid operand for code '%c'", code);
17572 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17574 output_operand_lossage ("invalid operand for code '%c'", code);
17578 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17582 /* Print a VFP/Neon double precision or quad precision register name. */
17586 int mode = GET_MODE (x);
17587 int is_quad = (code == 'q');
17590 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17592 output_operand_lossage ("invalid operand for code '%c'", code);
17596 if (GET_CODE (x) != REG
17597 || !IS_VFP_REGNUM (REGNO (x)))
17599 output_operand_lossage ("invalid operand for code '%c'", code);
17604 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17605 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17607 output_operand_lossage ("invalid operand for code '%c'", code);
17611 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17612 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17616 /* These two codes print the low/high doubleword register of a Neon quad
17617 register, respectively. For pair-structure types, can also print
17618 low/high quadword registers. */
17622 int mode = GET_MODE (x);
17625 if ((GET_MODE_SIZE (mode) != 16
17626 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17628 output_operand_lossage ("invalid operand for code '%c'", code);
17633 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17635 output_operand_lossage ("invalid operand for code '%c'", code);
17639 if (GET_MODE_SIZE (mode) == 16)
17640 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17641 + (code == 'f' ? 1 : 0));
17643 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17644 + (code == 'f' ? 1 : 0));
17648 /* Print a VFPv3 floating-point constant, represented as an integer
17652 int index = vfp3_const_double_index (x);
17653 gcc_assert (index != -1);
17654 fprintf (stream, "%d", index);
17658 /* Print bits representing opcode features for Neon.
17660 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17661 and polynomials as unsigned.
17663 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17665 Bit 2 is 1 for rounding functions, 0 otherwise. */
17667 /* Identify the type as 's', 'u', 'p' or 'f'. */
17670 HOST_WIDE_INT bits = INTVAL (x);
17671 fputc ("uspf"[bits & 3], stream);
17675 /* Likewise, but signed and unsigned integers are both 'i'. */
17678 HOST_WIDE_INT bits = INTVAL (x);
17679 fputc ("iipf"[bits & 3], stream);
17683 /* As for 'T', but emit 'u' instead of 'p'. */
17686 HOST_WIDE_INT bits = INTVAL (x);
17687 fputc ("usuf"[bits & 3], stream);
17691 /* Bit 2: rounding (vs none). */
17694 HOST_WIDE_INT bits = INTVAL (x);
17695 fputs ((bits & 4) != 0 ? "r" : "", stream);
17699 /* Memory operand for vld1/vst1 instruction. */
17703 bool postinc = FALSE;
17704 unsigned align, memsize, align_bits;
17706 gcc_assert (GET_CODE (x) == MEM);
17707 addr = XEXP (x, 0);
17708 if (GET_CODE (addr) == POST_INC)
17711 addr = XEXP (addr, 0);
17713 asm_fprintf (stream, "[%r", REGNO (addr));
17715 /* We know the alignment of this access, so we can emit a hint in the
17716 instruction (for some alignments) as an aid to the memory subsystem
17718 align = MEM_ALIGN (x) >> 3;
17719 memsize = MEM_SIZE (x);
17721 /* Only certain alignment specifiers are supported by the hardware. */
17722 if (memsize == 32 && (align % 32) == 0)
17724 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17726 else if (memsize >= 8 && (align % 8) == 0)
17731 if (align_bits != 0)
17732 asm_fprintf (stream, ":%d", align_bits);
17734 asm_fprintf (stream, "]");
17737 fputs("!", stream);
17745 gcc_assert (GET_CODE (x) == MEM);
17746 addr = XEXP (x, 0);
17747 gcc_assert (GET_CODE (addr) == REG);
17748 asm_fprintf (stream, "[%r]", REGNO (addr));
17752 /* Translate an S register number into a D register number and element index. */
17755 int mode = GET_MODE (x);
17758 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17760 output_operand_lossage ("invalid operand for code '%c'", code);
17765 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17767 output_operand_lossage ("invalid operand for code '%c'", code);
17771 regno = regno - FIRST_VFP_REGNUM;
17772 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17777 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17778 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17781 /* Register specifier for vld1.16/vst1.16. Translate the S register
17782 number into a D register number and element index. */
17785 int mode = GET_MODE (x);
17788 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17790 output_operand_lossage ("invalid operand for code '%c'", code);
17795 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17797 output_operand_lossage ("invalid operand for code '%c'", code);
17801 regno = regno - FIRST_VFP_REGNUM;
17802 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17809 output_operand_lossage ("missing operand");
17813 switch (GET_CODE (x))
17816 asm_fprintf (stream, "%r", REGNO (x));
17820 output_memory_reference_mode = GET_MODE (x);
17821 output_address (XEXP (x, 0));
17828 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17829 sizeof (fpstr), 0, 1);
17830 fprintf (stream, "#%s", fpstr);
17833 fprintf (stream, "#%s", fp_immediate_constant (x));
17837 gcc_assert (GET_CODE (x) != NEG);
17838 fputc ('#', stream);
17839 if (GET_CODE (x) == HIGH)
17841 fputs (":lower16:", stream);
17845 output_addr_const (stream, x);
17851 /* Target hook for printing a memory address. */
17853 arm_print_operand_address (FILE *stream, rtx x)
17857 int is_minus = GET_CODE (x) == MINUS;
17859 if (GET_CODE (x) == REG)
17860 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17861 else if (GET_CODE (x) == PLUS || is_minus)
17863 rtx base = XEXP (x, 0);
17864 rtx index = XEXP (x, 1);
17865 HOST_WIDE_INT offset = 0;
17866 if (GET_CODE (base) != REG
17867 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17869 /* Ensure that BASE is a register. */
17870 /* (one of them must be). */
17871 /* Also ensure the SP is not used as in index register. */
17876 switch (GET_CODE (index))
17879 offset = INTVAL (index);
17882 asm_fprintf (stream, "[%r, #%wd]",
17883 REGNO (base), offset);
17887 asm_fprintf (stream, "[%r, %s%r]",
17888 REGNO (base), is_minus ? "-" : "",
17898 asm_fprintf (stream, "[%r, %s%r",
17899 REGNO (base), is_minus ? "-" : "",
17900 REGNO (XEXP (index, 0)));
17901 arm_print_operand (stream, index, 'S');
17902 fputs ("]", stream);
17907 gcc_unreachable ();
17910 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17911 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17913 extern enum machine_mode output_memory_reference_mode;
17915 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17917 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17918 asm_fprintf (stream, "[%r, #%s%d]!",
17919 REGNO (XEXP (x, 0)),
17920 GET_CODE (x) == PRE_DEC ? "-" : "",
17921 GET_MODE_SIZE (output_memory_reference_mode));
17923 asm_fprintf (stream, "[%r], #%s%d",
17924 REGNO (XEXP (x, 0)),
17925 GET_CODE (x) == POST_DEC ? "-" : "",
17926 GET_MODE_SIZE (output_memory_reference_mode));
17928 else if (GET_CODE (x) == PRE_MODIFY)
17930 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17931 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17932 asm_fprintf (stream, "#%wd]!",
17933 INTVAL (XEXP (XEXP (x, 1), 1)));
17935 asm_fprintf (stream, "%r]!",
17936 REGNO (XEXP (XEXP (x, 1), 1)));
17938 else if (GET_CODE (x) == POST_MODIFY)
17940 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17941 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17942 asm_fprintf (stream, "#%wd",
17943 INTVAL (XEXP (XEXP (x, 1), 1)));
17945 asm_fprintf (stream, "%r",
17946 REGNO (XEXP (XEXP (x, 1), 1)));
17948 else output_addr_const (stream, x);
17952 if (GET_CODE (x) == REG)
17953 asm_fprintf (stream, "[%r]", REGNO (x));
17954 else if (GET_CODE (x) == POST_INC)
17955 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17956 else if (GET_CODE (x) == PLUS)
17958 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17959 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17960 asm_fprintf (stream, "[%r, #%wd]",
17961 REGNO (XEXP (x, 0)),
17962 INTVAL (XEXP (x, 1)));
17964 asm_fprintf (stream, "[%r, %r]",
17965 REGNO (XEXP (x, 0)),
17966 REGNO (XEXP (x, 1)));
17969 output_addr_const (stream, x);
17973 /* Target hook for indicating whether a punctuation character for
17974 TARGET_PRINT_OPERAND is valid. */
17976 arm_print_operand_punct_valid_p (unsigned char code)
17978 return (code == '@' || code == '|' || code == '.'
17979 || code == '(' || code == ')' || code == '#'
17980 || (TARGET_32BIT && (code == '?'))
17981 || (TARGET_THUMB2 && (code == '!'))
17982 || (TARGET_THUMB && (code == '_')));
17985 /* Target hook for assembling integer objects. The ARM version needs to
17986 handle word-sized values specially. */
17988 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17990 enum machine_mode mode;
17992 if (size == UNITS_PER_WORD && aligned_p)
17994 fputs ("\t.word\t", asm_out_file);
17995 output_addr_const (asm_out_file, x);
17997 /* Mark symbols as position independent. We only do this in the
17998 .text segment, not in the .data segment. */
17999 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18000 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18002 /* See legitimize_pic_address for an explanation of the
18003 TARGET_VXWORKS_RTP check. */
18004 if (TARGET_VXWORKS_RTP
18005 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18006 fputs ("(GOT)", asm_out_file);
18008 fputs ("(GOTOFF)", asm_out_file);
18010 fputc ('\n', asm_out_file);
18014 mode = GET_MODE (x);
18016 if (arm_vector_mode_supported_p (mode))
18020 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18022 units = CONST_VECTOR_NUNITS (x);
18023 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18025 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18026 for (i = 0; i < units; i++)
18028 rtx elt = CONST_VECTOR_ELT (x, i);
18030 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18033 for (i = 0; i < units; i++)
18035 rtx elt = CONST_VECTOR_ELT (x, i);
18036 REAL_VALUE_TYPE rval;
18038 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18041 (rval, GET_MODE_INNER (mode),
18042 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18048 return default_assemble_integer (x, size, aligned_p);
18052 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18056 if (!TARGET_AAPCS_BASED)
18059 default_named_section_asm_out_constructor
18060 : default_named_section_asm_out_destructor) (symbol, priority);
18064 /* Put these in the .init_array section, using a special relocation. */
18065 if (priority != DEFAULT_INIT_PRIORITY)
18068 sprintf (buf, "%s.%.5u",
18069 is_ctor ? ".init_array" : ".fini_array",
18071 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18078 switch_to_section (s);
18079 assemble_align (POINTER_SIZE);
18080 fputs ("\t.word\t", asm_out_file);
18081 output_addr_const (asm_out_file, symbol);
18082 fputs ("(target1)\n", asm_out_file);
18085 /* Add a function to the list of static constructors. */
18088 arm_elf_asm_constructor (rtx symbol, int priority)
18090 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18093 /* Add a function to the list of static destructors. */
18096 arm_elf_asm_destructor (rtx symbol, int priority)
18098 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18101 /* A finite state machine takes care of noticing whether or not instructions
18102 can be conditionally executed, and thus decrease execution time and code
18103 size by deleting branch instructions. The fsm is controlled by
18104 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18106 /* The state of the fsm controlling condition codes are:
18107 0: normal, do nothing special
18108 1: make ASM_OUTPUT_OPCODE not output this instruction
18109 2: make ASM_OUTPUT_OPCODE not output this instruction
18110 3: make instructions conditional
18111 4: make instructions conditional
18113 State transitions (state->state by whom under condition):
18114 0 -> 1 final_prescan_insn if the `target' is a label
18115 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18116 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18117 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18118 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18119 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18120 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18121 (the target insn is arm_target_insn).
18123 If the jump clobbers the conditions then we use states 2 and 4.
18125 A similar thing can be done with conditional return insns.
18127 XXX In case the `target' is an unconditional branch, this conditionalising
18128 of the instructions always reduces code size, but not always execution
18129 time. But then, I want to reduce the code size to somewhere near what
18130 /bin/cc produces. */
18132 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18133 instructions. When a COND_EXEC instruction is seen the subsequent
18134 instructions are scanned so that multiple conditional instructions can be
18135 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18136 specify the length and true/false mask for the IT block. These will be
18137 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18139 /* Returns the index of the ARM condition code string in
18140 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18141 COMPARISON should be an rtx like `(eq (...) (...))'. */
18144 maybe_get_arm_condition_code (rtx comparison)
18146 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18147 enum arm_cond_code code;
18148 enum rtx_code comp_code = GET_CODE (comparison);
18150 if (GET_MODE_CLASS (mode) != MODE_CC)
18151 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18152 XEXP (comparison, 1));
18156 case CC_DNEmode: code = ARM_NE; goto dominance;
18157 case CC_DEQmode: code = ARM_EQ; goto dominance;
18158 case CC_DGEmode: code = ARM_GE; goto dominance;
18159 case CC_DGTmode: code = ARM_GT; goto dominance;
18160 case CC_DLEmode: code = ARM_LE; goto dominance;
18161 case CC_DLTmode: code = ARM_LT; goto dominance;
18162 case CC_DGEUmode: code = ARM_CS; goto dominance;
18163 case CC_DGTUmode: code = ARM_HI; goto dominance;
18164 case CC_DLEUmode: code = ARM_LS; goto dominance;
18165 case CC_DLTUmode: code = ARM_CC;
18168 if (comp_code == EQ)
18169 return ARM_INVERSE_CONDITION_CODE (code);
18170 if (comp_code == NE)
18177 case NE: return ARM_NE;
18178 case EQ: return ARM_EQ;
18179 case GE: return ARM_PL;
18180 case LT: return ARM_MI;
18181 default: return ARM_NV;
18187 case NE: return ARM_NE;
18188 case EQ: return ARM_EQ;
18189 default: return ARM_NV;
18195 case NE: return ARM_MI;
18196 case EQ: return ARM_PL;
18197 default: return ARM_NV;
18202 /* These encodings assume that AC=1 in the FPA system control
18203 byte. This allows us to handle all cases except UNEQ and
18207 case GE: return ARM_GE;
18208 case GT: return ARM_GT;
18209 case LE: return ARM_LS;
18210 case LT: return ARM_MI;
18211 case NE: return ARM_NE;
18212 case EQ: return ARM_EQ;
18213 case ORDERED: return ARM_VC;
18214 case UNORDERED: return ARM_VS;
18215 case UNLT: return ARM_LT;
18216 case UNLE: return ARM_LE;
18217 case UNGT: return ARM_HI;
18218 case UNGE: return ARM_PL;
18219 /* UNEQ and LTGT do not have a representation. */
18220 case UNEQ: /* Fall through. */
18221 case LTGT: /* Fall through. */
18222 default: return ARM_NV;
18228 case NE: return ARM_NE;
18229 case EQ: return ARM_EQ;
18230 case GE: return ARM_LE;
18231 case GT: return ARM_LT;
18232 case LE: return ARM_GE;
18233 case LT: return ARM_GT;
18234 case GEU: return ARM_LS;
18235 case GTU: return ARM_CC;
18236 case LEU: return ARM_CS;
18237 case LTU: return ARM_HI;
18238 default: return ARM_NV;
18244 case LTU: return ARM_CS;
18245 case GEU: return ARM_CC;
18246 default: return ARM_NV;
18252 case NE: return ARM_NE;
18253 case EQ: return ARM_EQ;
18254 case GEU: return ARM_CS;
18255 case GTU: return ARM_HI;
18256 case LEU: return ARM_LS;
18257 case LTU: return ARM_CC;
18258 default: return ARM_NV;
18264 case GE: return ARM_GE;
18265 case LT: return ARM_LT;
18266 case GEU: return ARM_CS;
18267 case LTU: return ARM_CC;
18268 default: return ARM_NV;
18274 case NE: return ARM_NE;
18275 case EQ: return ARM_EQ;
18276 case GE: return ARM_GE;
18277 case GT: return ARM_GT;
18278 case LE: return ARM_LE;
18279 case LT: return ARM_LT;
18280 case GEU: return ARM_CS;
18281 case GTU: return ARM_HI;
18282 case LEU: return ARM_LS;
18283 case LTU: return ARM_CC;
18284 default: return ARM_NV;
18287 default: gcc_unreachable ();
18291 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18292 static enum arm_cond_code
18293 get_arm_condition_code (rtx comparison)
18295 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18296 gcc_assert (code != ARM_NV);
18300 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18303 thumb2_final_prescan_insn (rtx insn)
18305 rtx first_insn = insn;
18306 rtx body = PATTERN (insn);
18308 enum arm_cond_code code;
18312 /* Remove the previous insn from the count of insns to be output. */
18313 if (arm_condexec_count)
18314 arm_condexec_count--;
18316 /* Nothing to do if we are already inside a conditional block. */
18317 if (arm_condexec_count)
18320 if (GET_CODE (body) != COND_EXEC)
18323 /* Conditional jumps are implemented directly. */
18324 if (GET_CODE (insn) == JUMP_INSN)
18327 predicate = COND_EXEC_TEST (body);
18328 arm_current_cc = get_arm_condition_code (predicate);
18330 n = get_attr_ce_count (insn);
18331 arm_condexec_count = 1;
18332 arm_condexec_mask = (1 << n) - 1;
18333 arm_condexec_masklen = n;
18334 /* See if subsequent instructions can be combined into the same block. */
18337 insn = next_nonnote_insn (insn);
18339 /* Jumping into the middle of an IT block is illegal, so a label or
18340 barrier terminates the block. */
18341 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18344 body = PATTERN (insn);
18345 /* USE and CLOBBER aren't really insns, so just skip them. */
18346 if (GET_CODE (body) == USE
18347 || GET_CODE (body) == CLOBBER)
18350 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18351 if (GET_CODE (body) != COND_EXEC)
18353 /* Allow up to 4 conditionally executed instructions in a block. */
18354 n = get_attr_ce_count (insn);
18355 if (arm_condexec_masklen + n > 4)
18358 predicate = COND_EXEC_TEST (body);
18359 code = get_arm_condition_code (predicate);
18360 mask = (1 << n) - 1;
18361 if (arm_current_cc == code)
18362 arm_condexec_mask |= (mask << arm_condexec_masklen);
18363 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18366 arm_condexec_count++;
18367 arm_condexec_masklen += n;
18369 /* A jump must be the last instruction in a conditional block. */
18370 if (GET_CODE(insn) == JUMP_INSN)
18373 /* Restore recog_data (getting the attributes of other insns can
18374 destroy this array, but final.c assumes that it remains intact
18375 across this call). */
18376 extract_constrain_insn_cached (first_insn);
18380 arm_final_prescan_insn (rtx insn)
18382 /* BODY will hold the body of INSN. */
18383 rtx body = PATTERN (insn);
18385 /* This will be 1 if trying to repeat the trick, and things need to be
18386 reversed if it appears to fail. */
18389 /* If we start with a return insn, we only succeed if we find another one. */
18390 int seeking_return = 0;
18391 enum rtx_code return_code = UNKNOWN;
18393 /* START_INSN will hold the insn from where we start looking. This is the
18394 first insn after the following code_label if REVERSE is true. */
18395 rtx start_insn = insn;
18397 /* If in state 4, check if the target branch is reached, in order to
18398 change back to state 0. */
18399 if (arm_ccfsm_state == 4)
18401 if (insn == arm_target_insn)
18403 arm_target_insn = NULL;
18404 arm_ccfsm_state = 0;
18409 /* If in state 3, it is possible to repeat the trick, if this insn is an
18410 unconditional branch to a label, and immediately following this branch
18411 is the previous target label which is only used once, and the label this
18412 branch jumps to is not too far off. */
18413 if (arm_ccfsm_state == 3)
18415 if (simplejump_p (insn))
18417 start_insn = next_nonnote_insn (start_insn);
18418 if (GET_CODE (start_insn) == BARRIER)
18420 /* XXX Isn't this always a barrier? */
18421 start_insn = next_nonnote_insn (start_insn);
18423 if (GET_CODE (start_insn) == CODE_LABEL
18424 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18425 && LABEL_NUSES (start_insn) == 1)
18430 else if (ANY_RETURN_P (body))
18432 start_insn = next_nonnote_insn (start_insn);
18433 if (GET_CODE (start_insn) == BARRIER)
18434 start_insn = next_nonnote_insn (start_insn);
18435 if (GET_CODE (start_insn) == CODE_LABEL
18436 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18437 && LABEL_NUSES (start_insn) == 1)
18440 seeking_return = 1;
18441 return_code = GET_CODE (body);
18450 gcc_assert (!arm_ccfsm_state || reverse);
18451 if (GET_CODE (insn) != JUMP_INSN)
18454 /* This jump might be paralleled with a clobber of the condition codes
18455 the jump should always come first */
18456 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18457 body = XVECEXP (body, 0, 0);
18460 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18461 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18464 int fail = FALSE, succeed = FALSE;
18465 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18466 int then_not_else = TRUE;
18467 rtx this_insn = start_insn, label = 0;
18469 /* Register the insn jumped to. */
18472 if (!seeking_return)
18473 label = XEXP (SET_SRC (body), 0);
18475 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18476 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18477 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18479 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18480 then_not_else = FALSE;
18482 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18484 seeking_return = 1;
18485 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18487 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18489 seeking_return = 1;
18490 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18491 then_not_else = FALSE;
18494 gcc_unreachable ();
18496 /* See how many insns this branch skips, and what kind of insns. If all
18497 insns are okay, and the label or unconditional branch to the same
18498 label is not too far away, succeed. */
18499 for (insns_skipped = 0;
18500 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18504 this_insn = next_nonnote_insn (this_insn);
18508 switch (GET_CODE (this_insn))
18511 /* Succeed if it is the target label, otherwise fail since
18512 control falls in from somewhere else. */
18513 if (this_insn == label)
18515 arm_ccfsm_state = 1;
18523 /* Succeed if the following insn is the target label.
18525 If return insns are used then the last insn in a function
18526 will be a barrier. */
18527 this_insn = next_nonnote_insn (this_insn);
18528 if (this_insn && this_insn == label)
18530 arm_ccfsm_state = 1;
18538 /* The AAPCS says that conditional calls should not be
18539 used since they make interworking inefficient (the
18540 linker can't transform BL<cond> into BLX). That's
18541 only a problem if the machine has BLX. */
18548 /* Succeed if the following insn is the target label, or
18549 if the following two insns are a barrier and the
18551 this_insn = next_nonnote_insn (this_insn);
18552 if (this_insn && GET_CODE (this_insn) == BARRIER)
18553 this_insn = next_nonnote_insn (this_insn);
18555 if (this_insn && this_insn == label
18556 && insns_skipped < max_insns_skipped)
18558 arm_ccfsm_state = 1;
18566 /* If this is an unconditional branch to the same label, succeed.
18567 If it is to another label, do nothing. If it is conditional,
18569 /* XXX Probably, the tests for SET and the PC are
18572 scanbody = PATTERN (this_insn);
18573 if (GET_CODE (scanbody) == SET
18574 && GET_CODE (SET_DEST (scanbody)) == PC)
18576 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18577 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18579 arm_ccfsm_state = 2;
18582 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18585 /* Fail if a conditional return is undesirable (e.g. on a
18586 StrongARM), but still allow this if optimizing for size. */
18587 else if (GET_CODE (scanbody) == return_code
18588 && !use_return_insn (TRUE, NULL)
18591 else if (GET_CODE (scanbody) == return_code)
18593 arm_ccfsm_state = 2;
18596 else if (GET_CODE (scanbody) == PARALLEL)
18598 switch (get_attr_conds (this_insn))
18608 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18613 /* Instructions using or affecting the condition codes make it
18615 scanbody = PATTERN (this_insn);
18616 if (!(GET_CODE (scanbody) == SET
18617 || GET_CODE (scanbody) == PARALLEL)
18618 || get_attr_conds (this_insn) != CONDS_NOCOND)
18621 /* A conditional cirrus instruction must be followed by
18622 a non Cirrus instruction. However, since we
18623 conditionalize instructions in this function and by
18624 the time we get here we can't add instructions
18625 (nops), because shorten_branches() has already been
18626 called, we will disable conditionalizing Cirrus
18627 instructions to be safe. */
18628 if (GET_CODE (scanbody) != USE
18629 && GET_CODE (scanbody) != CLOBBER
18630 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18640 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18641 arm_target_label = CODE_LABEL_NUMBER (label);
18644 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18646 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18648 this_insn = next_nonnote_insn (this_insn);
18649 gcc_assert (!this_insn
18650 || (GET_CODE (this_insn) != BARRIER
18651 && GET_CODE (this_insn) != CODE_LABEL));
18655 /* Oh, dear! we ran off the end.. give up. */
18656 extract_constrain_insn_cached (insn);
18657 arm_ccfsm_state = 0;
18658 arm_target_insn = NULL;
18661 arm_target_insn = this_insn;
18664 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18667 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18669 if (reverse || then_not_else)
18670 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18673 /* Restore recog_data (getting the attributes of other insns can
18674 destroy this array, but final.c assumes that it remains intact
18675 across this call. */
18676 extract_constrain_insn_cached (insn);
18680 /* Output IT instructions. */
18682 thumb2_asm_output_opcode (FILE * stream)
18687 if (arm_condexec_mask)
18689 for (n = 0; n < arm_condexec_masklen; n++)
18690 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18692 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18693 arm_condition_codes[arm_current_cc]);
18694 arm_condexec_mask = 0;
18698 /* Returns true if REGNO is a valid register
18699 for holding a quantity of type MODE. */
18701 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18703 if (GET_MODE_CLASS (mode) == MODE_CC)
18704 return (regno == CC_REGNUM
18705 || (TARGET_HARD_FLOAT && TARGET_VFP
18706 && regno == VFPCC_REGNUM));
18709 /* For the Thumb we only allow values bigger than SImode in
18710 registers 0 - 6, so that there is always a second low
18711 register available to hold the upper part of the value.
18712 We probably we ought to ensure that the register is the
18713 start of an even numbered register pair. */
18714 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18716 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18717 && IS_CIRRUS_REGNUM (regno))
18718 /* We have outlawed SI values in Cirrus registers because they
18719 reside in the lower 32 bits, but SF values reside in the
18720 upper 32 bits. This causes gcc all sorts of grief. We can't
18721 even split the registers into pairs because Cirrus SI values
18722 get sign extended to 64bits-- aldyh. */
18723 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18725 if (TARGET_HARD_FLOAT && TARGET_VFP
18726 && IS_VFP_REGNUM (regno))
18728 if (mode == SFmode || mode == SImode)
18729 return VFP_REGNO_OK_FOR_SINGLE (regno);
18731 if (mode == DFmode)
18732 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18734 /* VFP registers can hold HFmode values, but there is no point in
18735 putting them there unless we have hardware conversion insns. */
18736 if (mode == HFmode)
18737 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18740 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18741 || (VALID_NEON_QREG_MODE (mode)
18742 && NEON_REGNO_OK_FOR_QUAD (regno))
18743 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18744 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18745 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18746 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18747 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18752 if (TARGET_REALLY_IWMMXT)
18754 if (IS_IWMMXT_GR_REGNUM (regno))
18755 return mode == SImode;
18757 if (IS_IWMMXT_REGNUM (regno))
18758 return VALID_IWMMXT_REG_MODE (mode);
18761 /* We allow almost any value to be stored in the general registers.
18762 Restrict doubleword quantities to even register pairs so that we can
18763 use ldrd. Do not allow very large Neon structure opaque modes in
18764 general registers; they would use too many. */
18765 if (regno <= LAST_ARM_REGNUM)
18766 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18767 && ARM_NUM_REGS (mode) <= 4;
18769 if (regno == FRAME_POINTER_REGNUM
18770 || regno == ARG_POINTER_REGNUM)
18771 /* We only allow integers in the fake hard registers. */
18772 return GET_MODE_CLASS (mode) == MODE_INT;
18774 /* The only registers left are the FPA registers
18775 which we only allow to hold FP values. */
18776 return (TARGET_HARD_FLOAT && TARGET_FPA
18777 && GET_MODE_CLASS (mode) == MODE_FLOAT
18778 && regno >= FIRST_FPA_REGNUM
18779 && regno <= LAST_FPA_REGNUM);
18782 /* Implement MODES_TIEABLE_P. */
18785 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18787 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18790 /* We specifically want to allow elements of "structure" modes to
18791 be tieable to the structure. This more general condition allows
18792 other rarer situations too. */
18794 && (VALID_NEON_DREG_MODE (mode1)
18795 || VALID_NEON_QREG_MODE (mode1)
18796 || VALID_NEON_STRUCT_MODE (mode1))
18797 && (VALID_NEON_DREG_MODE (mode2)
18798 || VALID_NEON_QREG_MODE (mode2)
18799 || VALID_NEON_STRUCT_MODE (mode2)))
18805 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18806 not used in arm mode. */
18809 arm_regno_class (int regno)
18813 if (regno == STACK_POINTER_REGNUM)
18815 if (regno == CC_REGNUM)
18822 if (TARGET_THUMB2 && regno < 8)
18825 if ( regno <= LAST_ARM_REGNUM
18826 || regno == FRAME_POINTER_REGNUM
18827 || regno == ARG_POINTER_REGNUM)
18828 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18830 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18831 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18833 if (IS_CIRRUS_REGNUM (regno))
18834 return CIRRUS_REGS;
18836 if (IS_VFP_REGNUM (regno))
18838 if (regno <= D7_VFP_REGNUM)
18839 return VFP_D0_D7_REGS;
18840 else if (regno <= LAST_LO_VFP_REGNUM)
18841 return VFP_LO_REGS;
18843 return VFP_HI_REGS;
18846 if (IS_IWMMXT_REGNUM (regno))
18847 return IWMMXT_REGS;
18849 if (IS_IWMMXT_GR_REGNUM (regno))
18850 return IWMMXT_GR_REGS;
18855 /* Handle a special case when computing the offset
18856 of an argument from the frame pointer. */
18858 arm_debugger_arg_offset (int value, rtx addr)
18862 /* We are only interested if dbxout_parms() failed to compute the offset. */
18866 /* We can only cope with the case where the address is held in a register. */
18867 if (GET_CODE (addr) != REG)
18870 /* If we are using the frame pointer to point at the argument, then
18871 an offset of 0 is correct. */
18872 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18875 /* If we are using the stack pointer to point at the
18876 argument, then an offset of 0 is correct. */
18877 /* ??? Check this is consistent with thumb2 frame layout. */
18878 if ((TARGET_THUMB || !frame_pointer_needed)
18879 && REGNO (addr) == SP_REGNUM)
18882 /* Oh dear. The argument is pointed to by a register rather
18883 than being held in a register, or being stored at a known
18884 offset from the frame pointer. Since GDB only understands
18885 those two kinds of argument we must translate the address
18886 held in the register into an offset from the frame pointer.
18887 We do this by searching through the insns for the function
18888 looking to see where this register gets its value. If the
18889 register is initialized from the frame pointer plus an offset
18890 then we are in luck and we can continue, otherwise we give up.
18892 This code is exercised by producing debugging information
18893 for a function with arguments like this:
18895 double func (double a, double b, int c, double d) {return d;}
18897 Without this code the stab for parameter 'd' will be set to
18898 an offset of 0 from the frame pointer, rather than 8. */
18900 /* The if() statement says:
18902 If the insn is a normal instruction
18903 and if the insn is setting the value in a register
18904 and if the register being set is the register holding the address of the argument
18905 and if the address is computing by an addition
18906 that involves adding to a register
18907 which is the frame pointer
18912 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18914 if ( GET_CODE (insn) == INSN
18915 && GET_CODE (PATTERN (insn)) == SET
18916 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18917 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18918 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18919 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18920 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18923 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18932 warning (0, "unable to compute real location of stacked parameter");
18933 value = 8; /* XXX magic hack */
18953 T_MAX /* Size of enum. Keep last. */
18954 } neon_builtin_type_mode;
18956 #define TYPE_MODE_BIT(X) (1 << (X))
18958 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18959 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18960 | TYPE_MODE_BIT (T_DI))
18961 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18962 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18963 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18965 #define v8qi_UP T_V8QI
18966 #define v4hi_UP T_V4HI
18967 #define v2si_UP T_V2SI
18968 #define v2sf_UP T_V2SF
18970 #define v16qi_UP T_V16QI
18971 #define v8hi_UP T_V8HI
18972 #define v4si_UP T_V4SI
18973 #define v4sf_UP T_V4SF
18974 #define v2di_UP T_V2DI
18979 #define UP(X) X##_UP
19012 NEON_LOADSTRUCTLANE,
19014 NEON_STORESTRUCTLANE,
19023 const neon_itype itype;
19024 const neon_builtin_type_mode mode;
19025 const enum insn_code code;
19026 unsigned int fcode;
19027 } neon_builtin_datum;
19029 #define CF(N,X) CODE_FOR_neon_##N##X
19031 #define VAR1(T, N, A) \
19032 {#N, NEON_##T, UP (A), CF (N, A), 0}
19033 #define VAR2(T, N, A, B) \
19035 {#N, NEON_##T, UP (B), CF (N, B), 0}
19036 #define VAR3(T, N, A, B, C) \
19037 VAR2 (T, N, A, B), \
19038 {#N, NEON_##T, UP (C), CF (N, C), 0}
19039 #define VAR4(T, N, A, B, C, D) \
19040 VAR3 (T, N, A, B, C), \
19041 {#N, NEON_##T, UP (D), CF (N, D), 0}
19042 #define VAR5(T, N, A, B, C, D, E) \
19043 VAR4 (T, N, A, B, C, D), \
19044 {#N, NEON_##T, UP (E), CF (N, E), 0}
19045 #define VAR6(T, N, A, B, C, D, E, F) \
19046 VAR5 (T, N, A, B, C, D, E), \
19047 {#N, NEON_##T, UP (F), CF (N, F), 0}
19048 #define VAR7(T, N, A, B, C, D, E, F, G) \
19049 VAR6 (T, N, A, B, C, D, E, F), \
19050 {#N, NEON_##T, UP (G), CF (N, G), 0}
19051 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19052 VAR7 (T, N, A, B, C, D, E, F, G), \
19053 {#N, NEON_##T, UP (H), CF (N, H), 0}
19054 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19055 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19056 {#N, NEON_##T, UP (I), CF (N, I), 0}
19057 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19058 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19059 {#N, NEON_##T, UP (J), CF (N, J), 0}
19061 /* The mode entries in the following table correspond to the "key" type of the
19062 instruction variant, i.e. equivalent to that which would be specified after
19063 the assembler mnemonic, which usually refers to the last vector operand.
19064 (Signed/unsigned/polynomial types are not differentiated between though, and
19065 are all mapped onto the same mode for a given element size.) The modes
19066 listed per instruction should be the same as those defined for that
19067 instruction's pattern in neon.md. */
19069 static neon_builtin_datum neon_builtin_data[] =
19071 VAR10 (BINOP, vadd,
19072 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19073 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19074 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19075 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19076 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19077 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19078 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19079 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19080 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19081 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19082 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19083 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19084 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19085 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19086 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19087 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19088 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19089 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19090 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19091 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19092 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19093 VAR2 (BINOP, vqdmull, v4hi, v2si),
19094 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19095 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19096 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19097 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19098 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19099 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19100 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19101 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19102 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19103 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19104 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19105 VAR10 (BINOP, vsub,
19106 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19107 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19108 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19109 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19110 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19111 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19112 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19113 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19114 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19115 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19116 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19117 VAR2 (BINOP, vcage, v2sf, v4sf),
19118 VAR2 (BINOP, vcagt, v2sf, v4sf),
19119 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19120 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19121 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19122 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19123 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19124 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19125 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19126 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19127 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19128 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19129 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19130 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19131 VAR2 (BINOP, vrecps, v2sf, v4sf),
19132 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19133 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19134 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19135 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19136 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19137 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19138 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19139 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19140 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19141 VAR2 (UNOP, vcnt, v8qi, v16qi),
19142 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19143 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19144 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19145 /* FIXME: vget_lane supports more variants than this! */
19146 VAR10 (GETLANE, vget_lane,
19147 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19148 VAR10 (SETLANE, vset_lane,
19149 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19150 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19151 VAR10 (DUP, vdup_n,
19152 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19153 VAR10 (DUPLANE, vdup_lane,
19154 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19155 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19156 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19157 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19158 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19159 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19160 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19161 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19162 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19163 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19164 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19165 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19166 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19167 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19168 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19169 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19170 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19171 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19172 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19173 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19174 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19175 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19176 VAR10 (BINOP, vext,
19177 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19178 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19179 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19180 VAR2 (UNOP, vrev16, v8qi, v16qi),
19181 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19182 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19183 VAR10 (SELECT, vbsl,
19184 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19185 VAR1 (VTBL, vtbl1, v8qi),
19186 VAR1 (VTBL, vtbl2, v8qi),
19187 VAR1 (VTBL, vtbl3, v8qi),
19188 VAR1 (VTBL, vtbl4, v8qi),
19189 VAR1 (VTBX, vtbx1, v8qi),
19190 VAR1 (VTBX, vtbx2, v8qi),
19191 VAR1 (VTBX, vtbx3, v8qi),
19192 VAR1 (VTBX, vtbx4, v8qi),
19193 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19194 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19195 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19196 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19197 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19198 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19199 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19200 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19201 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19202 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19203 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19204 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19205 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19206 VAR10 (LOAD1, vld1,
19207 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19208 VAR10 (LOAD1LANE, vld1_lane,
19209 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19210 VAR10 (LOAD1, vld1_dup,
19211 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19212 VAR10 (STORE1, vst1,
19213 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19214 VAR10 (STORE1LANE, vst1_lane,
19215 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19217 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19218 VAR7 (LOADSTRUCTLANE, vld2_lane,
19219 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19220 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19221 VAR9 (STORESTRUCT, vst2,
19222 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19223 VAR7 (STORESTRUCTLANE, vst2_lane,
19224 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19226 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19227 VAR7 (LOADSTRUCTLANE, vld3_lane,
19228 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19229 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19230 VAR9 (STORESTRUCT, vst3,
19231 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19232 VAR7 (STORESTRUCTLANE, vst3_lane,
19233 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19234 VAR9 (LOADSTRUCT, vld4,
19235 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19236 VAR7 (LOADSTRUCTLANE, vld4_lane,
19237 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19238 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19239 VAR9 (STORESTRUCT, vst4,
19240 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19241 VAR7 (STORESTRUCTLANE, vst4_lane,
19242 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19243 VAR10 (LOGICBINOP, vand,
19244 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19245 VAR10 (LOGICBINOP, vorr,
19246 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19247 VAR10 (BINOP, veor,
19248 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19249 VAR10 (LOGICBINOP, vbic,
19250 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19251 VAR10 (LOGICBINOP, vorn,
19252 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19267 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19268 symbolic names defined here (which would require too much duplication).
19272 ARM_BUILTIN_GETWCX,
19273 ARM_BUILTIN_SETWCX,
19277 ARM_BUILTIN_WAVG2BR,
19278 ARM_BUILTIN_WAVG2HR,
19279 ARM_BUILTIN_WAVG2B,
19280 ARM_BUILTIN_WAVG2H,
19287 ARM_BUILTIN_WMACSZ,
19289 ARM_BUILTIN_WMACUZ,
19292 ARM_BUILTIN_WSADBZ,
19294 ARM_BUILTIN_WSADHZ,
19296 ARM_BUILTIN_WALIGN,
19299 ARM_BUILTIN_TMIAPH,
19300 ARM_BUILTIN_TMIABB,
19301 ARM_BUILTIN_TMIABT,
19302 ARM_BUILTIN_TMIATB,
19303 ARM_BUILTIN_TMIATT,
19305 ARM_BUILTIN_TMOVMSKB,
19306 ARM_BUILTIN_TMOVMSKH,
19307 ARM_BUILTIN_TMOVMSKW,
19309 ARM_BUILTIN_TBCSTB,
19310 ARM_BUILTIN_TBCSTH,
19311 ARM_BUILTIN_TBCSTW,
19313 ARM_BUILTIN_WMADDS,
19314 ARM_BUILTIN_WMADDU,
19316 ARM_BUILTIN_WPACKHSS,
19317 ARM_BUILTIN_WPACKWSS,
19318 ARM_BUILTIN_WPACKDSS,
19319 ARM_BUILTIN_WPACKHUS,
19320 ARM_BUILTIN_WPACKWUS,
19321 ARM_BUILTIN_WPACKDUS,
19326 ARM_BUILTIN_WADDSSB,
19327 ARM_BUILTIN_WADDSSH,
19328 ARM_BUILTIN_WADDSSW,
19329 ARM_BUILTIN_WADDUSB,
19330 ARM_BUILTIN_WADDUSH,
19331 ARM_BUILTIN_WADDUSW,
19335 ARM_BUILTIN_WSUBSSB,
19336 ARM_BUILTIN_WSUBSSH,
19337 ARM_BUILTIN_WSUBSSW,
19338 ARM_BUILTIN_WSUBUSB,
19339 ARM_BUILTIN_WSUBUSH,
19340 ARM_BUILTIN_WSUBUSW,
19347 ARM_BUILTIN_WCMPEQB,
19348 ARM_BUILTIN_WCMPEQH,
19349 ARM_BUILTIN_WCMPEQW,
19350 ARM_BUILTIN_WCMPGTUB,
19351 ARM_BUILTIN_WCMPGTUH,
19352 ARM_BUILTIN_WCMPGTUW,
19353 ARM_BUILTIN_WCMPGTSB,
19354 ARM_BUILTIN_WCMPGTSH,
19355 ARM_BUILTIN_WCMPGTSW,
19357 ARM_BUILTIN_TEXTRMSB,
19358 ARM_BUILTIN_TEXTRMSH,
19359 ARM_BUILTIN_TEXTRMSW,
19360 ARM_BUILTIN_TEXTRMUB,
19361 ARM_BUILTIN_TEXTRMUH,
19362 ARM_BUILTIN_TEXTRMUW,
19363 ARM_BUILTIN_TINSRB,
19364 ARM_BUILTIN_TINSRH,
19365 ARM_BUILTIN_TINSRW,
19367 ARM_BUILTIN_WMAXSW,
19368 ARM_BUILTIN_WMAXSH,
19369 ARM_BUILTIN_WMAXSB,
19370 ARM_BUILTIN_WMAXUW,
19371 ARM_BUILTIN_WMAXUH,
19372 ARM_BUILTIN_WMAXUB,
19373 ARM_BUILTIN_WMINSW,
19374 ARM_BUILTIN_WMINSH,
19375 ARM_BUILTIN_WMINSB,
19376 ARM_BUILTIN_WMINUW,
19377 ARM_BUILTIN_WMINUH,
19378 ARM_BUILTIN_WMINUB,
19380 ARM_BUILTIN_WMULUM,
19381 ARM_BUILTIN_WMULSM,
19382 ARM_BUILTIN_WMULUL,
19384 ARM_BUILTIN_PSADBH,
19385 ARM_BUILTIN_WSHUFH,
19399 ARM_BUILTIN_WSLLHI,
19400 ARM_BUILTIN_WSLLWI,
19401 ARM_BUILTIN_WSLLDI,
19402 ARM_BUILTIN_WSRAHI,
19403 ARM_BUILTIN_WSRAWI,
19404 ARM_BUILTIN_WSRADI,
19405 ARM_BUILTIN_WSRLHI,
19406 ARM_BUILTIN_WSRLWI,
19407 ARM_BUILTIN_WSRLDI,
19408 ARM_BUILTIN_WRORHI,
19409 ARM_BUILTIN_WRORWI,
19410 ARM_BUILTIN_WRORDI,
19412 ARM_BUILTIN_WUNPCKIHB,
19413 ARM_BUILTIN_WUNPCKIHH,
19414 ARM_BUILTIN_WUNPCKIHW,
19415 ARM_BUILTIN_WUNPCKILB,
19416 ARM_BUILTIN_WUNPCKILH,
19417 ARM_BUILTIN_WUNPCKILW,
19419 ARM_BUILTIN_WUNPCKEHSB,
19420 ARM_BUILTIN_WUNPCKEHSH,
19421 ARM_BUILTIN_WUNPCKEHSW,
19422 ARM_BUILTIN_WUNPCKEHUB,
19423 ARM_BUILTIN_WUNPCKEHUH,
19424 ARM_BUILTIN_WUNPCKEHUW,
19425 ARM_BUILTIN_WUNPCKELSB,
19426 ARM_BUILTIN_WUNPCKELSH,
19427 ARM_BUILTIN_WUNPCKELSW,
19428 ARM_BUILTIN_WUNPCKELUB,
19429 ARM_BUILTIN_WUNPCKELUH,
19430 ARM_BUILTIN_WUNPCKELUW,
19432 ARM_BUILTIN_THREAD_POINTER,
19434 ARM_BUILTIN_NEON_BASE,
19436 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19439 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19442 arm_init_neon_builtins (void)
19444 unsigned int i, fcode;
19447 tree neon_intQI_type_node;
19448 tree neon_intHI_type_node;
19449 tree neon_polyQI_type_node;
19450 tree neon_polyHI_type_node;
19451 tree neon_intSI_type_node;
19452 tree neon_intDI_type_node;
19453 tree neon_float_type_node;
19455 tree intQI_pointer_node;
19456 tree intHI_pointer_node;
19457 tree intSI_pointer_node;
19458 tree intDI_pointer_node;
19459 tree float_pointer_node;
19461 tree const_intQI_node;
19462 tree const_intHI_node;
19463 tree const_intSI_node;
19464 tree const_intDI_node;
19465 tree const_float_node;
19467 tree const_intQI_pointer_node;
19468 tree const_intHI_pointer_node;
19469 tree const_intSI_pointer_node;
19470 tree const_intDI_pointer_node;
19471 tree const_float_pointer_node;
19473 tree V8QI_type_node;
19474 tree V4HI_type_node;
19475 tree V2SI_type_node;
19476 tree V2SF_type_node;
19477 tree V16QI_type_node;
19478 tree V8HI_type_node;
19479 tree V4SI_type_node;
19480 tree V4SF_type_node;
19481 tree V2DI_type_node;
19483 tree intUQI_type_node;
19484 tree intUHI_type_node;
19485 tree intUSI_type_node;
19486 tree intUDI_type_node;
19488 tree intEI_type_node;
19489 tree intOI_type_node;
19490 tree intCI_type_node;
19491 tree intXI_type_node;
19493 tree V8QI_pointer_node;
19494 tree V4HI_pointer_node;
19495 tree V2SI_pointer_node;
19496 tree V2SF_pointer_node;
19497 tree V16QI_pointer_node;
19498 tree V8HI_pointer_node;
19499 tree V4SI_pointer_node;
19500 tree V4SF_pointer_node;
19501 tree V2DI_pointer_node;
19503 tree void_ftype_pv8qi_v8qi_v8qi;
19504 tree void_ftype_pv4hi_v4hi_v4hi;
19505 tree void_ftype_pv2si_v2si_v2si;
19506 tree void_ftype_pv2sf_v2sf_v2sf;
19507 tree void_ftype_pdi_di_di;
19508 tree void_ftype_pv16qi_v16qi_v16qi;
19509 tree void_ftype_pv8hi_v8hi_v8hi;
19510 tree void_ftype_pv4si_v4si_v4si;
19511 tree void_ftype_pv4sf_v4sf_v4sf;
19512 tree void_ftype_pv2di_v2di_v2di;
19514 tree reinterp_ftype_dreg[5][5];
19515 tree reinterp_ftype_qreg[5][5];
19516 tree dreg_types[5], qreg_types[5];
19518 /* Create distinguished type nodes for NEON vector element types,
19519 and pointers to values of such types, so we can detect them later. */
19520 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19521 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19522 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19523 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19524 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19525 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19526 neon_float_type_node = make_node (REAL_TYPE);
19527 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19528 layout_type (neon_float_type_node);
19530 /* Define typedefs which exactly correspond to the modes we are basing vector
19531 types on. If you change these names you'll need to change
19532 the table used by arm_mangle_type too. */
19533 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19534 "__builtin_neon_qi");
19535 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19536 "__builtin_neon_hi");
19537 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19538 "__builtin_neon_si");
19539 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19540 "__builtin_neon_sf");
19541 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19542 "__builtin_neon_di");
19543 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19544 "__builtin_neon_poly8");
19545 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19546 "__builtin_neon_poly16");
19548 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19549 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19550 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19551 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19552 float_pointer_node = build_pointer_type (neon_float_type_node);
19554 /* Next create constant-qualified versions of the above types. */
19555 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19557 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19559 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19561 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19563 const_float_node = build_qualified_type (neon_float_type_node,
19566 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19567 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19568 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19569 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19570 const_float_pointer_node = build_pointer_type (const_float_node);
19572 /* Now create vector types based on our NEON element types. */
19573 /* 64-bit vectors. */
19575 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19577 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19579 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19581 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19582 /* 128-bit vectors. */
19584 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19586 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19588 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19590 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19592 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19594 /* Unsigned integer types for various mode sizes. */
19595 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19596 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19597 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19598 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19600 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19601 "__builtin_neon_uqi");
19602 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19603 "__builtin_neon_uhi");
19604 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19605 "__builtin_neon_usi");
19606 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19607 "__builtin_neon_udi");
19609 /* Opaque integer types for structures of vectors. */
19610 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19611 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19612 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19613 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19615 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19616 "__builtin_neon_ti");
19617 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19618 "__builtin_neon_ei");
19619 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19620 "__builtin_neon_oi");
19621 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19622 "__builtin_neon_ci");
19623 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19624 "__builtin_neon_xi");
19626 /* Pointers to vector types. */
19627 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19628 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19629 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19630 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19631 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19632 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19633 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19634 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19635 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19637 /* Operations which return results as pairs. */
19638 void_ftype_pv8qi_v8qi_v8qi =
19639 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19640 V8QI_type_node, NULL);
19641 void_ftype_pv4hi_v4hi_v4hi =
19642 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19643 V4HI_type_node, NULL);
19644 void_ftype_pv2si_v2si_v2si =
19645 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19646 V2SI_type_node, NULL);
19647 void_ftype_pv2sf_v2sf_v2sf =
19648 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19649 V2SF_type_node, NULL);
19650 void_ftype_pdi_di_di =
19651 build_function_type_list (void_type_node, intDI_pointer_node,
19652 neon_intDI_type_node, neon_intDI_type_node, NULL);
19653 void_ftype_pv16qi_v16qi_v16qi =
19654 build_function_type_list (void_type_node, V16QI_pointer_node,
19655 V16QI_type_node, V16QI_type_node, NULL);
19656 void_ftype_pv8hi_v8hi_v8hi =
19657 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19658 V8HI_type_node, NULL);
19659 void_ftype_pv4si_v4si_v4si =
19660 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19661 V4SI_type_node, NULL);
19662 void_ftype_pv4sf_v4sf_v4sf =
19663 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19664 V4SF_type_node, NULL);
19665 void_ftype_pv2di_v2di_v2di =
19666 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19667 V2DI_type_node, NULL);
19669 dreg_types[0] = V8QI_type_node;
19670 dreg_types[1] = V4HI_type_node;
19671 dreg_types[2] = V2SI_type_node;
19672 dreg_types[3] = V2SF_type_node;
19673 dreg_types[4] = neon_intDI_type_node;
19675 qreg_types[0] = V16QI_type_node;
19676 qreg_types[1] = V8HI_type_node;
19677 qreg_types[2] = V4SI_type_node;
19678 qreg_types[3] = V4SF_type_node;
19679 qreg_types[4] = V2DI_type_node;
19681 for (i = 0; i < 5; i++)
19684 for (j = 0; j < 5; j++)
19686 reinterp_ftype_dreg[i][j]
19687 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19688 reinterp_ftype_qreg[i][j]
19689 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19693 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19694 i < ARRAY_SIZE (neon_builtin_data);
19697 neon_builtin_datum *d = &neon_builtin_data[i];
19699 const char* const modenames[] = {
19700 "v8qi", "v4hi", "v2si", "v2sf", "di",
19701 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19706 int is_load = 0, is_store = 0;
19708 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19715 case NEON_LOAD1LANE:
19716 case NEON_LOADSTRUCT:
19717 case NEON_LOADSTRUCTLANE:
19719 /* Fall through. */
19721 case NEON_STORE1LANE:
19722 case NEON_STORESTRUCT:
19723 case NEON_STORESTRUCTLANE:
19726 /* Fall through. */
19729 case NEON_LOGICBINOP:
19730 case NEON_SHIFTINSERT:
19737 case NEON_SHIFTIMM:
19738 case NEON_SHIFTACC:
19744 case NEON_LANEMULL:
19745 case NEON_LANEMULH:
19747 case NEON_SCALARMUL:
19748 case NEON_SCALARMULL:
19749 case NEON_SCALARMULH:
19750 case NEON_SCALARMAC:
19756 tree return_type = void_type_node, args = void_list_node;
19758 /* Build a function type directly from the insn_data for
19759 this builtin. The build_function_type() function takes
19760 care of removing duplicates for us. */
19761 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19765 if (is_load && k == 1)
19767 /* Neon load patterns always have the memory
19768 operand in the operand 1 position. */
19769 gcc_assert (insn_data[d->code].operand[k].predicate
19770 == neon_struct_operand);
19776 eltype = const_intQI_pointer_node;
19781 eltype = const_intHI_pointer_node;
19786 eltype = const_intSI_pointer_node;
19791 eltype = const_float_pointer_node;
19796 eltype = const_intDI_pointer_node;
19799 default: gcc_unreachable ();
19802 else if (is_store && k == 0)
19804 /* Similarly, Neon store patterns use operand 0 as
19805 the memory location to store to. */
19806 gcc_assert (insn_data[d->code].operand[k].predicate
19807 == neon_struct_operand);
19813 eltype = intQI_pointer_node;
19818 eltype = intHI_pointer_node;
19823 eltype = intSI_pointer_node;
19828 eltype = float_pointer_node;
19833 eltype = intDI_pointer_node;
19836 default: gcc_unreachable ();
19841 switch (insn_data[d->code].operand[k].mode)
19843 case VOIDmode: eltype = void_type_node; break;
19845 case QImode: eltype = neon_intQI_type_node; break;
19846 case HImode: eltype = neon_intHI_type_node; break;
19847 case SImode: eltype = neon_intSI_type_node; break;
19848 case SFmode: eltype = neon_float_type_node; break;
19849 case DImode: eltype = neon_intDI_type_node; break;
19850 case TImode: eltype = intTI_type_node; break;
19851 case EImode: eltype = intEI_type_node; break;
19852 case OImode: eltype = intOI_type_node; break;
19853 case CImode: eltype = intCI_type_node; break;
19854 case XImode: eltype = intXI_type_node; break;
19855 /* 64-bit vectors. */
19856 case V8QImode: eltype = V8QI_type_node; break;
19857 case V4HImode: eltype = V4HI_type_node; break;
19858 case V2SImode: eltype = V2SI_type_node; break;
19859 case V2SFmode: eltype = V2SF_type_node; break;
19860 /* 128-bit vectors. */
19861 case V16QImode: eltype = V16QI_type_node; break;
19862 case V8HImode: eltype = V8HI_type_node; break;
19863 case V4SImode: eltype = V4SI_type_node; break;
19864 case V4SFmode: eltype = V4SF_type_node; break;
19865 case V2DImode: eltype = V2DI_type_node; break;
19866 default: gcc_unreachable ();
19870 if (k == 0 && !is_store)
19871 return_type = eltype;
19873 args = tree_cons (NULL_TREE, eltype, args);
19876 ftype = build_function_type (return_type, args);
19880 case NEON_RESULTPAIR:
19882 switch (insn_data[d->code].operand[1].mode)
19884 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19885 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19886 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19887 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19888 case DImode: ftype = void_ftype_pdi_di_di; break;
19889 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19890 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19891 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19892 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19893 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19894 default: gcc_unreachable ();
19899 case NEON_REINTERP:
19901 /* We iterate over 5 doubleword types, then 5 quadword
19903 int rhs = d->mode % 5;
19904 switch (insn_data[d->code].operand[0].mode)
19906 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19907 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19908 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19909 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19910 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19911 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19912 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19913 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19914 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19915 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19916 default: gcc_unreachable ();
19922 gcc_unreachable ();
19925 gcc_assert (ftype != NULL);
19927 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19929 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19931 arm_builtin_decls[fcode] = decl;
19935 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19938 if ((MASK) & insn_flags) \
19941 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19942 BUILT_IN_MD, NULL, NULL_TREE); \
19943 arm_builtin_decls[CODE] = bdecl; \
19948 struct builtin_description
19950 const unsigned int mask;
19951 const enum insn_code icode;
19952 const char * const name;
19953 const enum arm_builtins code;
19954 const enum rtx_code comparison;
19955 const unsigned int flag;
19958 static const struct builtin_description bdesc_2arg[] =
19960 #define IWMMXT_BUILTIN(code, string, builtin) \
19961 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19962 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19964 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19965 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19966 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19967 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19968 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19969 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19970 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19971 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19972 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19973 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19974 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19975 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19976 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19977 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19978 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19979 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19980 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19981 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19982 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19983 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19984 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19985 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19986 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19987 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19988 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19989 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19990 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19991 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19992 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19993 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19994 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19995 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19996 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19997 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19998 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19999 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20000 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20001 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20002 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20003 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20004 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20005 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20006 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20007 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20008 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20009 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20010 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20011 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20012 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20013 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20014 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20015 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20016 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20017 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20018 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20019 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20020 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
20021 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
20023 #define IWMMXT_BUILTIN2(code, builtin) \
20024 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20026 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20027 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20028 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20029 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20030 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20031 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20032 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
20033 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20034 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
20035 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20036 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
20037 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
20038 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
20039 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20040 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
20041 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20042 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
20043 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
20044 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
20045 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20046 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
20047 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20048 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
20049 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
20050 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
20051 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
20052 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
20053 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
20054 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
20055 IWMMXT_BUILTIN2 (rordi3, WRORDI)
20056 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20057 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20060 static const struct builtin_description bdesc_1arg[] =
20062 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20063 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20064 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20065 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20066 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20067 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20068 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20069 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20070 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20071 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20072 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20073 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20074 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20075 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20076 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20077 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20078 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20079 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20082 /* Set up all the iWMMXt builtins. This is not called if
20083 TARGET_IWMMXT is zero. */
20086 arm_init_iwmmxt_builtins (void)
20088 const struct builtin_description * d;
20091 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20092 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20093 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20096 = build_function_type_list (integer_type_node,
20097 integer_type_node, NULL_TREE);
20098 tree v8qi_ftype_v8qi_v8qi_int
20099 = build_function_type_list (V8QI_type_node,
20100 V8QI_type_node, V8QI_type_node,
20101 integer_type_node, NULL_TREE);
20102 tree v4hi_ftype_v4hi_int
20103 = build_function_type_list (V4HI_type_node,
20104 V4HI_type_node, integer_type_node, NULL_TREE);
20105 tree v2si_ftype_v2si_int
20106 = build_function_type_list (V2SI_type_node,
20107 V2SI_type_node, integer_type_node, NULL_TREE);
20108 tree v2si_ftype_di_di
20109 = build_function_type_list (V2SI_type_node,
20110 long_long_integer_type_node,
20111 long_long_integer_type_node,
20113 tree di_ftype_di_int
20114 = build_function_type_list (long_long_integer_type_node,
20115 long_long_integer_type_node,
20116 integer_type_node, NULL_TREE);
20117 tree di_ftype_di_int_int
20118 = build_function_type_list (long_long_integer_type_node,
20119 long_long_integer_type_node,
20121 integer_type_node, NULL_TREE);
20122 tree int_ftype_v8qi
20123 = build_function_type_list (integer_type_node,
20124 V8QI_type_node, NULL_TREE);
20125 tree int_ftype_v4hi
20126 = build_function_type_list (integer_type_node,
20127 V4HI_type_node, NULL_TREE);
20128 tree int_ftype_v2si
20129 = build_function_type_list (integer_type_node,
20130 V2SI_type_node, NULL_TREE);
20131 tree int_ftype_v8qi_int
20132 = build_function_type_list (integer_type_node,
20133 V8QI_type_node, integer_type_node, NULL_TREE);
20134 tree int_ftype_v4hi_int
20135 = build_function_type_list (integer_type_node,
20136 V4HI_type_node, integer_type_node, NULL_TREE);
20137 tree int_ftype_v2si_int
20138 = build_function_type_list (integer_type_node,
20139 V2SI_type_node, integer_type_node, NULL_TREE);
20140 tree v8qi_ftype_v8qi_int_int
20141 = build_function_type_list (V8QI_type_node,
20142 V8QI_type_node, integer_type_node,
20143 integer_type_node, NULL_TREE);
20144 tree v4hi_ftype_v4hi_int_int
20145 = build_function_type_list (V4HI_type_node,
20146 V4HI_type_node, integer_type_node,
20147 integer_type_node, NULL_TREE);
20148 tree v2si_ftype_v2si_int_int
20149 = build_function_type_list (V2SI_type_node,
20150 V2SI_type_node, integer_type_node,
20151 integer_type_node, NULL_TREE);
20152 /* Miscellaneous. */
20153 tree v8qi_ftype_v4hi_v4hi
20154 = build_function_type_list (V8QI_type_node,
20155 V4HI_type_node, V4HI_type_node, NULL_TREE);
20156 tree v4hi_ftype_v2si_v2si
20157 = build_function_type_list (V4HI_type_node,
20158 V2SI_type_node, V2SI_type_node, NULL_TREE);
20159 tree v2si_ftype_v4hi_v4hi
20160 = build_function_type_list (V2SI_type_node,
20161 V4HI_type_node, V4HI_type_node, NULL_TREE);
20162 tree v2si_ftype_v8qi_v8qi
20163 = build_function_type_list (V2SI_type_node,
20164 V8QI_type_node, V8QI_type_node, NULL_TREE);
20165 tree v4hi_ftype_v4hi_di
20166 = build_function_type_list (V4HI_type_node,
20167 V4HI_type_node, long_long_integer_type_node,
20169 tree v2si_ftype_v2si_di
20170 = build_function_type_list (V2SI_type_node,
20171 V2SI_type_node, long_long_integer_type_node,
20173 tree void_ftype_int_int
20174 = build_function_type_list (void_type_node,
20175 integer_type_node, integer_type_node,
20178 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20180 = build_function_type_list (long_long_integer_type_node,
20181 V8QI_type_node, NULL_TREE);
20183 = build_function_type_list (long_long_integer_type_node,
20184 V4HI_type_node, NULL_TREE);
20186 = build_function_type_list (long_long_integer_type_node,
20187 V2SI_type_node, NULL_TREE);
20188 tree v2si_ftype_v4hi
20189 = build_function_type_list (V2SI_type_node,
20190 V4HI_type_node, NULL_TREE);
20191 tree v4hi_ftype_v8qi
20192 = build_function_type_list (V4HI_type_node,
20193 V8QI_type_node, NULL_TREE);
20195 tree di_ftype_di_v4hi_v4hi
20196 = build_function_type_list (long_long_unsigned_type_node,
20197 long_long_unsigned_type_node,
20198 V4HI_type_node, V4HI_type_node,
20201 tree di_ftype_v4hi_v4hi
20202 = build_function_type_list (long_long_unsigned_type_node,
20203 V4HI_type_node,V4HI_type_node,
20206 /* Normal vector binops. */
20207 tree v8qi_ftype_v8qi_v8qi
20208 = build_function_type_list (V8QI_type_node,
20209 V8QI_type_node, V8QI_type_node, NULL_TREE);
20210 tree v4hi_ftype_v4hi_v4hi
20211 = build_function_type_list (V4HI_type_node,
20212 V4HI_type_node,V4HI_type_node, NULL_TREE);
20213 tree v2si_ftype_v2si_v2si
20214 = build_function_type_list (V2SI_type_node,
20215 V2SI_type_node, V2SI_type_node, NULL_TREE);
20216 tree di_ftype_di_di
20217 = build_function_type_list (long_long_unsigned_type_node,
20218 long_long_unsigned_type_node,
20219 long_long_unsigned_type_node,
20222 /* Add all builtins that are more or less simple operations on two
20224 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20226 /* Use one of the operands; the target can have a different mode for
20227 mask-generating compares. */
20228 enum machine_mode mode;
20234 mode = insn_data[d->icode].operand[1].mode;
20239 type = v8qi_ftype_v8qi_v8qi;
20242 type = v4hi_ftype_v4hi_v4hi;
20245 type = v2si_ftype_v2si_v2si;
20248 type = di_ftype_di_di;
20252 gcc_unreachable ();
20255 def_mbuiltin (d->mask, d->name, type, d->code);
20258 /* Add the remaining MMX insns with somewhat more complicated types. */
20259 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20260 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20261 ARM_BUILTIN_ ## CODE)
20263 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20264 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20265 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20267 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20268 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20269 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20270 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20271 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20272 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20274 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20275 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20276 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20277 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20278 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20279 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20281 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20282 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20283 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20284 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20285 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20286 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20288 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20289 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20290 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20291 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20292 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20293 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20295 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20297 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20298 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20299 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20300 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20302 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20303 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20304 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20305 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20306 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20307 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20308 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20309 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20310 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20312 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20313 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20314 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20316 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20317 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20318 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20320 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20321 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20322 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20323 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20324 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20325 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20327 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20328 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20329 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20330 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20331 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20332 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20333 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20334 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20335 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20336 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20337 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20338 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20340 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20341 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20342 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20343 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20345 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20346 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20347 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20348 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20349 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20350 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20351 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20353 #undef iwmmx_mbuiltin
20357 arm_init_tls_builtins (void)
20361 ftype = build_function_type (ptr_type_node, void_list_node);
20362 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20363 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20365 TREE_NOTHROW (decl) = 1;
20366 TREE_READONLY (decl) = 1;
20367 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20371 arm_init_fp16_builtins (void)
20373 tree fp16_type = make_node (REAL_TYPE);
20374 TYPE_PRECISION (fp16_type) = 16;
20375 layout_type (fp16_type);
20376 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20380 arm_init_builtins (void)
20382 arm_init_tls_builtins ();
20384 if (TARGET_REALLY_IWMMXT)
20385 arm_init_iwmmxt_builtins ();
20388 arm_init_neon_builtins ();
20390 if (arm_fp16_format)
20391 arm_init_fp16_builtins ();
20394 /* Return the ARM builtin for CODE. */
20397 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20399 if (code >= ARM_BUILTIN_MAX)
20400 return error_mark_node;
20402 return arm_builtin_decls[code];
20405 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20407 static const char *
20408 arm_invalid_parameter_type (const_tree t)
20410 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20411 return N_("function parameters cannot have __fp16 type");
20415 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20417 static const char *
20418 arm_invalid_return_type (const_tree t)
20420 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20421 return N_("functions cannot return __fp16 type");
20425 /* Implement TARGET_PROMOTED_TYPE. */
20428 arm_promoted_type (const_tree t)
20430 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20431 return float_type_node;
20435 /* Implement TARGET_CONVERT_TO_TYPE.
20436 Specifically, this hook implements the peculiarity of the ARM
20437 half-precision floating-point C semantics that requires conversions between
20438 __fp16 to or from double to do an intermediate conversion to float. */
20441 arm_convert_to_type (tree type, tree expr)
20443 tree fromtype = TREE_TYPE (expr);
20444 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20446 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20447 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20448 return convert (type, convert (float_type_node, expr));
20452 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20453 This simply adds HFmode as a supported mode; even though we don't
20454 implement arithmetic on this type directly, it's supported by
20455 optabs conversions, much the way the double-word arithmetic is
20456 special-cased in the default hook. */
20459 arm_scalar_mode_supported_p (enum machine_mode mode)
20461 if (mode == HFmode)
20462 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20463 else if (ALL_FIXED_POINT_MODE_P (mode))
20466 return default_scalar_mode_supported_p (mode);
20469 /* Errors in the source file can cause expand_expr to return const0_rtx
20470 where we expect a vector. To avoid crashing, use one of the vector
20471 clear instructions. */
20474 safe_vector_operand (rtx x, enum machine_mode mode)
20476 if (x != const0_rtx)
20478 x = gen_reg_rtx (mode);
20480 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20481 : gen_rtx_SUBREG (DImode, x, 0)));
20485 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20488 arm_expand_binop_builtin (enum insn_code icode,
20489 tree exp, rtx target)
20492 tree arg0 = CALL_EXPR_ARG (exp, 0);
20493 tree arg1 = CALL_EXPR_ARG (exp, 1);
20494 rtx op0 = expand_normal (arg0);
20495 rtx op1 = expand_normal (arg1);
20496 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20497 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20498 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20500 if (VECTOR_MODE_P (mode0))
20501 op0 = safe_vector_operand (op0, mode0);
20502 if (VECTOR_MODE_P (mode1))
20503 op1 = safe_vector_operand (op1, mode1);
20506 || GET_MODE (target) != tmode
20507 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20508 target = gen_reg_rtx (tmode);
20510 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20512 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20513 op0 = copy_to_mode_reg (mode0, op0);
20514 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20515 op1 = copy_to_mode_reg (mode1, op1);
20517 pat = GEN_FCN (icode) (target, op0, op1);
20524 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20527 arm_expand_unop_builtin (enum insn_code icode,
20528 tree exp, rtx target, int do_load)
20531 tree arg0 = CALL_EXPR_ARG (exp, 0);
20532 rtx op0 = expand_normal (arg0);
20533 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20534 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20537 || GET_MODE (target) != tmode
20538 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20539 target = gen_reg_rtx (tmode);
20541 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20544 if (VECTOR_MODE_P (mode0))
20545 op0 = safe_vector_operand (op0, mode0);
20547 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20548 op0 = copy_to_mode_reg (mode0, op0);
20551 pat = GEN_FCN (icode) (target, op0);
20559 NEON_ARG_COPY_TO_REG,
20565 #define NEON_MAX_BUILTIN_ARGS 5
20567 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20568 and return an expression for the accessed memory.
20570 The intrinsic function operates on a block of registers that has
20571 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20572 The function references the memory at EXP in mode MEM_MODE;
20573 this mode may be BLKmode if no more suitable mode is available. */
20576 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20577 enum machine_mode reg_mode,
20578 neon_builtin_type_mode type_mode)
20580 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20581 tree elem_type, upper_bound, array_type;
20583 /* Work out the size of the register block in bytes. */
20584 reg_size = GET_MODE_SIZE (reg_mode);
20586 /* Work out the size of each vector in bytes. */
20587 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20588 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20590 /* Work out how many vectors there are. */
20591 gcc_assert (reg_size % vector_size == 0);
20592 nvectors = reg_size / vector_size;
20594 /* Work out how many elements are being loaded or stored.
20595 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20596 and memory elements; anything else implies a lane load or store. */
20597 if (mem_mode == reg_mode)
20598 nelems = vector_size * nvectors;
20602 /* Work out the type of each element. */
20603 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20604 elem_type = TREE_TYPE (TREE_TYPE (exp));
20606 /* Create a type that describes the full access. */
20607 upper_bound = build_int_cst (size_type_node, nelems - 1);
20608 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20610 /* Dereference EXP using that type. */
20611 return fold_build2 (MEM_REF, array_type, exp,
20612 build_int_cst (build_pointer_type (array_type), 0));
20615 /* Expand a Neon builtin. */
20617 arm_expand_neon_args (rtx target, int icode, int have_retval,
20618 neon_builtin_type_mode type_mode,
20623 tree arg[NEON_MAX_BUILTIN_ARGS];
20624 rtx op[NEON_MAX_BUILTIN_ARGS];
20625 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20626 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20627 enum machine_mode other_mode;
20633 || GET_MODE (target) != tmode
20634 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20635 target = gen_reg_rtx (tmode);
20637 va_start (ap, exp);
20641 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20643 if (thisarg == NEON_ARG_STOP)
20647 opno = argc + have_retval;
20648 mode[argc] = insn_data[icode].operand[opno].mode;
20649 arg[argc] = CALL_EXPR_ARG (exp, argc);
20650 if (thisarg == NEON_ARG_MEMORY)
20652 other_mode = insn_data[icode].operand[1 - opno].mode;
20653 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20654 other_mode, type_mode);
20656 op[argc] = expand_normal (arg[argc]);
20660 case NEON_ARG_COPY_TO_REG:
20661 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20662 if (!(*insn_data[icode].operand[opno].predicate)
20663 (op[argc], mode[argc]))
20664 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20667 case NEON_ARG_CONSTANT:
20668 /* FIXME: This error message is somewhat unhelpful. */
20669 if (!(*insn_data[icode].operand[opno].predicate)
20670 (op[argc], mode[argc]))
20671 error ("argument must be a constant");
20674 case NEON_ARG_MEMORY:
20675 gcc_assert (MEM_P (op[argc]));
20676 PUT_MODE (op[argc], mode[argc]);
20677 /* ??? arm_neon.h uses the same built-in functions for signed
20678 and unsigned accesses, casting where necessary. This isn't
20680 set_mem_alias_set (op[argc], 0);
20681 if (!(*insn_data[icode].operand[opno].predicate)
20682 (op[argc], mode[argc]))
20683 op[argc] = (replace_equiv_address
20684 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20687 case NEON_ARG_STOP:
20688 gcc_unreachable ();
20701 pat = GEN_FCN (icode) (target, op[0]);
20705 pat = GEN_FCN (icode) (target, op[0], op[1]);
20709 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20713 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20717 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20721 gcc_unreachable ();
20727 pat = GEN_FCN (icode) (op[0]);
20731 pat = GEN_FCN (icode) (op[0], op[1]);
20735 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20739 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20743 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20747 gcc_unreachable ();
20758 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20759 constants defined per-instruction or per instruction-variant. Instead, the
20760 required info is looked up in the table neon_builtin_data. */
20762 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20764 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20765 neon_itype itype = d->itype;
20766 enum insn_code icode = d->code;
20767 neon_builtin_type_mode type_mode = d->mode;
20774 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20775 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20779 case NEON_SCALARMUL:
20780 case NEON_SCALARMULL:
20781 case NEON_SCALARMULH:
20782 case NEON_SHIFTINSERT:
20783 case NEON_LOGICBINOP:
20784 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20785 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20789 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20790 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20791 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20795 case NEON_SHIFTIMM:
20796 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20797 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20801 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20802 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20806 case NEON_REINTERP:
20807 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20808 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20812 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20813 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20815 case NEON_RESULTPAIR:
20816 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20817 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20821 case NEON_LANEMULL:
20822 case NEON_LANEMULH:
20823 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20824 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20825 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20828 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20829 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20830 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20832 case NEON_SHIFTACC:
20833 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20834 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20835 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20837 case NEON_SCALARMAC:
20838 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20839 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20840 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20844 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20845 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20849 case NEON_LOADSTRUCT:
20850 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20851 NEON_ARG_MEMORY, NEON_ARG_STOP);
20853 case NEON_LOAD1LANE:
20854 case NEON_LOADSTRUCTLANE:
20855 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20856 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20860 case NEON_STORESTRUCT:
20861 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20862 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20864 case NEON_STORE1LANE:
20865 case NEON_STORESTRUCTLANE:
20866 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20867 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20871 gcc_unreachable ();
20874 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20876 neon_reinterpret (rtx dest, rtx src)
20878 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20881 /* Emit code to place a Neon pair result in memory locations (with equal
20884 neon_emit_pair_result_insn (enum machine_mode mode,
20885 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20888 rtx mem = gen_rtx_MEM (mode, destaddr);
20889 rtx tmp1 = gen_reg_rtx (mode);
20890 rtx tmp2 = gen_reg_rtx (mode);
20892 emit_insn (intfn (tmp1, op1, op2, tmp2));
20894 emit_move_insn (mem, tmp1);
20895 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20896 emit_move_insn (mem, tmp2);
20899 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20900 not to early-clobber SRC registers in the process.
20902 We assume that the operands described by SRC and DEST represent a
20903 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20904 number of components into which the copy has been decomposed. */
20906 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20910 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20911 || REGNO (operands[0]) < REGNO (operands[1]))
20913 for (i = 0; i < count; i++)
20915 operands[2 * i] = dest[i];
20916 operands[2 * i + 1] = src[i];
20921 for (i = 0; i < count; i++)
20923 operands[2 * i] = dest[count - i - 1];
20924 operands[2 * i + 1] = src[count - i - 1];
20929 /* Split operands into moves from op[1] + op[2] into op[0]. */
20932 neon_split_vcombine (rtx operands[3])
20934 unsigned int dest = REGNO (operands[0]);
20935 unsigned int src1 = REGNO (operands[1]);
20936 unsigned int src2 = REGNO (operands[2]);
20937 enum machine_mode halfmode = GET_MODE (operands[1]);
20938 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20939 rtx destlo, desthi;
20941 if (src1 == dest && src2 == dest + halfregs)
20943 /* No-op move. Can't split to nothing; emit something. */
20944 emit_note (NOTE_INSN_DELETED);
20948 /* Preserve register attributes for variable tracking. */
20949 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20950 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20951 GET_MODE_SIZE (halfmode));
20953 /* Special case of reversed high/low parts. Use VSWP. */
20954 if (src2 == dest && src1 == dest + halfregs)
20956 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20957 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20958 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20962 if (!reg_overlap_mentioned_p (operands[2], destlo))
20964 /* Try to avoid unnecessary moves if part of the result
20965 is in the right place already. */
20967 emit_move_insn (destlo, operands[1]);
20968 if (src2 != dest + halfregs)
20969 emit_move_insn (desthi, operands[2]);
20973 if (src2 != dest + halfregs)
20974 emit_move_insn (desthi, operands[2]);
20976 emit_move_insn (destlo, operands[1]);
20980 /* Expand an expression EXP that calls a built-in function,
20981 with result going to TARGET if that's convenient
20982 (and in mode MODE if that's convenient).
20983 SUBTARGET may be used as the target for computing one of EXP's operands.
20984 IGNORE is nonzero if the value is to be ignored. */
20987 arm_expand_builtin (tree exp,
20989 rtx subtarget ATTRIBUTE_UNUSED,
20990 enum machine_mode mode ATTRIBUTE_UNUSED,
20991 int ignore ATTRIBUTE_UNUSED)
20993 const struct builtin_description * d;
20994 enum insn_code icode;
20995 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21003 int fcode = DECL_FUNCTION_CODE (fndecl);
21005 enum machine_mode tmode;
21006 enum machine_mode mode0;
21007 enum machine_mode mode1;
21008 enum machine_mode mode2;
21010 if (fcode >= ARM_BUILTIN_NEON_BASE)
21011 return arm_expand_neon_builtin (fcode, exp, target);
21015 case ARM_BUILTIN_TEXTRMSB:
21016 case ARM_BUILTIN_TEXTRMUB:
21017 case ARM_BUILTIN_TEXTRMSH:
21018 case ARM_BUILTIN_TEXTRMUH:
21019 case ARM_BUILTIN_TEXTRMSW:
21020 case ARM_BUILTIN_TEXTRMUW:
21021 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21022 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21023 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21024 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21025 : CODE_FOR_iwmmxt_textrmw);
21027 arg0 = CALL_EXPR_ARG (exp, 0);
21028 arg1 = CALL_EXPR_ARG (exp, 1);
21029 op0 = expand_normal (arg0);
21030 op1 = expand_normal (arg1);
21031 tmode = insn_data[icode].operand[0].mode;
21032 mode0 = insn_data[icode].operand[1].mode;
21033 mode1 = insn_data[icode].operand[2].mode;
21035 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21036 op0 = copy_to_mode_reg (mode0, op0);
21037 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21039 /* @@@ better error message */
21040 error ("selector must be an immediate");
21041 return gen_reg_rtx (tmode);
21044 || GET_MODE (target) != tmode
21045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21046 target = gen_reg_rtx (tmode);
21047 pat = GEN_FCN (icode) (target, op0, op1);
21053 case ARM_BUILTIN_TINSRB:
21054 case ARM_BUILTIN_TINSRH:
21055 case ARM_BUILTIN_TINSRW:
21056 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21057 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21058 : CODE_FOR_iwmmxt_tinsrw);
21059 arg0 = CALL_EXPR_ARG (exp, 0);
21060 arg1 = CALL_EXPR_ARG (exp, 1);
21061 arg2 = CALL_EXPR_ARG (exp, 2);
21062 op0 = expand_normal (arg0);
21063 op1 = expand_normal (arg1);
21064 op2 = expand_normal (arg2);
21065 tmode = insn_data[icode].operand[0].mode;
21066 mode0 = insn_data[icode].operand[1].mode;
21067 mode1 = insn_data[icode].operand[2].mode;
21068 mode2 = insn_data[icode].operand[3].mode;
21070 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21071 op0 = copy_to_mode_reg (mode0, op0);
21072 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21073 op1 = copy_to_mode_reg (mode1, op1);
21074 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21076 /* @@@ better error message */
21077 error ("selector must be an immediate");
21081 || GET_MODE (target) != tmode
21082 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21083 target = gen_reg_rtx (tmode);
21084 pat = GEN_FCN (icode) (target, op0, op1, op2);
21090 case ARM_BUILTIN_SETWCX:
21091 arg0 = CALL_EXPR_ARG (exp, 0);
21092 arg1 = CALL_EXPR_ARG (exp, 1);
21093 op0 = force_reg (SImode, expand_normal (arg0));
21094 op1 = expand_normal (arg1);
21095 emit_insn (gen_iwmmxt_tmcr (op1, op0));
21098 case ARM_BUILTIN_GETWCX:
21099 arg0 = CALL_EXPR_ARG (exp, 0);
21100 op0 = expand_normal (arg0);
21101 target = gen_reg_rtx (SImode);
21102 emit_insn (gen_iwmmxt_tmrc (target, op0));
21105 case ARM_BUILTIN_WSHUFH:
21106 icode = CODE_FOR_iwmmxt_wshufh;
21107 arg0 = CALL_EXPR_ARG (exp, 0);
21108 arg1 = CALL_EXPR_ARG (exp, 1);
21109 op0 = expand_normal (arg0);
21110 op1 = expand_normal (arg1);
21111 tmode = insn_data[icode].operand[0].mode;
21112 mode1 = insn_data[icode].operand[1].mode;
21113 mode2 = insn_data[icode].operand[2].mode;
21115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21116 op0 = copy_to_mode_reg (mode1, op0);
21117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21119 /* @@@ better error message */
21120 error ("mask must be an immediate");
21124 || GET_MODE (target) != tmode
21125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21126 target = gen_reg_rtx (tmode);
21127 pat = GEN_FCN (icode) (target, op0, op1);
21133 case ARM_BUILTIN_WSADB:
21134 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21135 case ARM_BUILTIN_WSADH:
21136 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21137 case ARM_BUILTIN_WSADBZ:
21138 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21139 case ARM_BUILTIN_WSADHZ:
21140 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21142 /* Several three-argument builtins. */
21143 case ARM_BUILTIN_WMACS:
21144 case ARM_BUILTIN_WMACU:
21145 case ARM_BUILTIN_WALIGN:
21146 case ARM_BUILTIN_TMIA:
21147 case ARM_BUILTIN_TMIAPH:
21148 case ARM_BUILTIN_TMIATT:
21149 case ARM_BUILTIN_TMIATB:
21150 case ARM_BUILTIN_TMIABT:
21151 case ARM_BUILTIN_TMIABB:
21152 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21153 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21154 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21155 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21156 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21157 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21158 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21159 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21160 : CODE_FOR_iwmmxt_walign);
21161 arg0 = CALL_EXPR_ARG (exp, 0);
21162 arg1 = CALL_EXPR_ARG (exp, 1);
21163 arg2 = CALL_EXPR_ARG (exp, 2);
21164 op0 = expand_normal (arg0);
21165 op1 = expand_normal (arg1);
21166 op2 = expand_normal (arg2);
21167 tmode = insn_data[icode].operand[0].mode;
21168 mode0 = insn_data[icode].operand[1].mode;
21169 mode1 = insn_data[icode].operand[2].mode;
21170 mode2 = insn_data[icode].operand[3].mode;
21172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21173 op0 = copy_to_mode_reg (mode0, op0);
21174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21175 op1 = copy_to_mode_reg (mode1, op1);
21176 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21177 op2 = copy_to_mode_reg (mode2, op2);
21179 || GET_MODE (target) != tmode
21180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21181 target = gen_reg_rtx (tmode);
21182 pat = GEN_FCN (icode) (target, op0, op1, op2);
21188 case ARM_BUILTIN_WZERO:
21189 target = gen_reg_rtx (DImode);
21190 emit_insn (gen_iwmmxt_clrdi (target));
21193 case ARM_BUILTIN_THREAD_POINTER:
21194 return arm_load_tp (target);
21200 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21201 if (d->code == (const enum arm_builtins) fcode)
21202 return arm_expand_binop_builtin (d->icode, exp, target);
21204 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21205 if (d->code == (const enum arm_builtins) fcode)
21206 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21208 /* @@@ Should really do something sensible here. */
21212 /* Return the number (counting from 0) of
21213 the least significant set bit in MASK. */
21216 number_of_first_bit_set (unsigned mask)
21218 return ctz_hwi (mask);
21221 /* Like emit_multi_reg_push, but allowing for a different set of
21222 registers to be described as saved. MASK is the set of registers
21223 to be saved; REAL_REGS is the set of registers to be described as
21224 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21227 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21229 unsigned long regno;
21230 rtx par[10], tmp, reg, insn;
21233 /* Build the parallel of the registers actually being stored. */
21234 for (i = 0; mask; ++i, mask &= mask - 1)
21236 regno = ctz_hwi (mask);
21237 reg = gen_rtx_REG (SImode, regno);
21240 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21242 tmp = gen_rtx_USE (VOIDmode, reg);
21247 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21248 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21249 tmp = gen_frame_mem (BLKmode, tmp);
21250 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21253 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21254 insn = emit_insn (tmp);
21256 /* Always build the stack adjustment note for unwind info. */
21257 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21258 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21261 /* Build the parallel of the registers recorded as saved for unwind. */
21262 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21264 regno = ctz_hwi (real_regs);
21265 reg = gen_rtx_REG (SImode, regno);
21267 tmp = plus_constant (stack_pointer_rtx, j * 4);
21268 tmp = gen_frame_mem (SImode, tmp);
21269 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21270 RTX_FRAME_RELATED_P (tmp) = 1;
21278 RTX_FRAME_RELATED_P (par[0]) = 1;
21279 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21282 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21287 /* Emit code to push or pop registers to or from the stack. F is the
21288 assembly file. MASK is the registers to pop. */
21290 thumb_pop (FILE *f, unsigned long mask)
21293 int lo_mask = mask & 0xFF;
21294 int pushed_words = 0;
21298 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21300 /* Special case. Do not generate a POP PC statement here, do it in
21302 thumb_exit (f, -1);
21306 fprintf (f, "\tpop\t{");
21308 /* Look at the low registers first. */
21309 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21313 asm_fprintf (f, "%r", regno);
21315 if ((lo_mask & ~1) != 0)
21322 if (mask & (1 << PC_REGNUM))
21324 /* Catch popping the PC. */
21325 if (TARGET_INTERWORK || TARGET_BACKTRACE
21326 || crtl->calls_eh_return)
21328 /* The PC is never poped directly, instead
21329 it is popped into r3 and then BX is used. */
21330 fprintf (f, "}\n");
21332 thumb_exit (f, -1);
21341 asm_fprintf (f, "%r", PC_REGNUM);
21345 fprintf (f, "}\n");
21348 /* Generate code to return from a thumb function.
21349 If 'reg_containing_return_addr' is -1, then the return address is
21350 actually on the stack, at the stack pointer. */
21352 thumb_exit (FILE *f, int reg_containing_return_addr)
21354 unsigned regs_available_for_popping;
21355 unsigned regs_to_pop;
21357 unsigned available;
21361 int restore_a4 = FALSE;
21363 /* Compute the registers we need to pop. */
21367 if (reg_containing_return_addr == -1)
21369 regs_to_pop |= 1 << LR_REGNUM;
21373 if (TARGET_BACKTRACE)
21375 /* Restore the (ARM) frame pointer and stack pointer. */
21376 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21380 /* If there is nothing to pop then just emit the BX instruction and
21382 if (pops_needed == 0)
21384 if (crtl->calls_eh_return)
21385 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21387 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21390 /* Otherwise if we are not supporting interworking and we have not created
21391 a backtrace structure and the function was not entered in ARM mode then
21392 just pop the return address straight into the PC. */
21393 else if (!TARGET_INTERWORK
21394 && !TARGET_BACKTRACE
21395 && !is_called_in_ARM_mode (current_function_decl)
21396 && !crtl->calls_eh_return)
21398 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21402 /* Find out how many of the (return) argument registers we can corrupt. */
21403 regs_available_for_popping = 0;
21405 /* If returning via __builtin_eh_return, the bottom three registers
21406 all contain information needed for the return. */
21407 if (crtl->calls_eh_return)
21411 /* If we can deduce the registers used from the function's
21412 return value. This is more reliable that examining
21413 df_regs_ever_live_p () because that will be set if the register is
21414 ever used in the function, not just if the register is used
21415 to hold a return value. */
21417 if (crtl->return_rtx != 0)
21418 mode = GET_MODE (crtl->return_rtx);
21420 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21422 size = GET_MODE_SIZE (mode);
21426 /* In a void function we can use any argument register.
21427 In a function that returns a structure on the stack
21428 we can use the second and third argument registers. */
21429 if (mode == VOIDmode)
21430 regs_available_for_popping =
21431 (1 << ARG_REGISTER (1))
21432 | (1 << ARG_REGISTER (2))
21433 | (1 << ARG_REGISTER (3));
21435 regs_available_for_popping =
21436 (1 << ARG_REGISTER (2))
21437 | (1 << ARG_REGISTER (3));
21439 else if (size <= 4)
21440 regs_available_for_popping =
21441 (1 << ARG_REGISTER (2))
21442 | (1 << ARG_REGISTER (3));
21443 else if (size <= 8)
21444 regs_available_for_popping =
21445 (1 << ARG_REGISTER (3));
21448 /* Match registers to be popped with registers into which we pop them. */
21449 for (available = regs_available_for_popping,
21450 required = regs_to_pop;
21451 required != 0 && available != 0;
21452 available &= ~(available & - available),
21453 required &= ~(required & - required))
21456 /* If we have any popping registers left over, remove them. */
21458 regs_available_for_popping &= ~available;
21460 /* Otherwise if we need another popping register we can use
21461 the fourth argument register. */
21462 else if (pops_needed)
21464 /* If we have not found any free argument registers and
21465 reg a4 contains the return address, we must move it. */
21466 if (regs_available_for_popping == 0
21467 && reg_containing_return_addr == LAST_ARG_REGNUM)
21469 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21470 reg_containing_return_addr = LR_REGNUM;
21472 else if (size > 12)
21474 /* Register a4 is being used to hold part of the return value,
21475 but we have dire need of a free, low register. */
21478 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21481 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21483 /* The fourth argument register is available. */
21484 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21490 /* Pop as many registers as we can. */
21491 thumb_pop (f, regs_available_for_popping);
21493 /* Process the registers we popped. */
21494 if (reg_containing_return_addr == -1)
21496 /* The return address was popped into the lowest numbered register. */
21497 regs_to_pop &= ~(1 << LR_REGNUM);
21499 reg_containing_return_addr =
21500 number_of_first_bit_set (regs_available_for_popping);
21502 /* Remove this register for the mask of available registers, so that
21503 the return address will not be corrupted by further pops. */
21504 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21507 /* If we popped other registers then handle them here. */
21508 if (regs_available_for_popping)
21512 /* Work out which register currently contains the frame pointer. */
21513 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21515 /* Move it into the correct place. */
21516 asm_fprintf (f, "\tmov\t%r, %r\n",
21517 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21519 /* (Temporarily) remove it from the mask of popped registers. */
21520 regs_available_for_popping &= ~(1 << frame_pointer);
21521 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21523 if (regs_available_for_popping)
21527 /* We popped the stack pointer as well,
21528 find the register that contains it. */
21529 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21531 /* Move it into the stack register. */
21532 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21534 /* At this point we have popped all necessary registers, so
21535 do not worry about restoring regs_available_for_popping
21536 to its correct value:
21538 assert (pops_needed == 0)
21539 assert (regs_available_for_popping == (1 << frame_pointer))
21540 assert (regs_to_pop == (1 << STACK_POINTER)) */
21544 /* Since we have just move the popped value into the frame
21545 pointer, the popping register is available for reuse, and
21546 we know that we still have the stack pointer left to pop. */
21547 regs_available_for_popping |= (1 << frame_pointer);
21551 /* If we still have registers left on the stack, but we no longer have
21552 any registers into which we can pop them, then we must move the return
21553 address into the link register and make available the register that
21555 if (regs_available_for_popping == 0 && pops_needed > 0)
21557 regs_available_for_popping |= 1 << reg_containing_return_addr;
21559 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21560 reg_containing_return_addr);
21562 reg_containing_return_addr = LR_REGNUM;
21565 /* If we have registers left on the stack then pop some more.
21566 We know that at most we will want to pop FP and SP. */
21567 if (pops_needed > 0)
21572 thumb_pop (f, regs_available_for_popping);
21574 /* We have popped either FP or SP.
21575 Move whichever one it is into the correct register. */
21576 popped_into = number_of_first_bit_set (regs_available_for_popping);
21577 move_to = number_of_first_bit_set (regs_to_pop);
21579 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21581 regs_to_pop &= ~(1 << move_to);
21586 /* If we still have not popped everything then we must have only
21587 had one register available to us and we are now popping the SP. */
21588 if (pops_needed > 0)
21592 thumb_pop (f, regs_available_for_popping);
21594 popped_into = number_of_first_bit_set (regs_available_for_popping);
21596 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21598 assert (regs_to_pop == (1 << STACK_POINTER))
21599 assert (pops_needed == 1)
21603 /* If necessary restore the a4 register. */
21606 if (reg_containing_return_addr != LR_REGNUM)
21608 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21609 reg_containing_return_addr = LR_REGNUM;
21612 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21615 if (crtl->calls_eh_return)
21616 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21618 /* Return to caller. */
21619 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21622 /* Scan INSN just before assembler is output for it.
21623 For Thumb-1, we track the status of the condition codes; this
21624 information is used in the cbranchsi4_insn pattern. */
21626 thumb1_final_prescan_insn (rtx insn)
21628 if (flag_print_asm_name)
21629 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21630 INSN_ADDRESSES (INSN_UID (insn)));
21631 /* Don't overwrite the previous setter when we get to a cbranch. */
21632 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21634 enum attr_conds conds;
21636 if (cfun->machine->thumb1_cc_insn)
21638 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21639 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21642 conds = get_attr_conds (insn);
21643 if (conds == CONDS_SET)
21645 rtx set = single_set (insn);
21646 cfun->machine->thumb1_cc_insn = insn;
21647 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21648 cfun->machine->thumb1_cc_op1 = const0_rtx;
21649 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21650 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21652 rtx src1 = XEXP (SET_SRC (set), 1);
21653 if (src1 == const0_rtx)
21654 cfun->machine->thumb1_cc_mode = CCmode;
21657 else if (conds != CONDS_NOCOND)
21658 cfun->machine->thumb1_cc_insn = NULL_RTX;
21663 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21665 unsigned HOST_WIDE_INT mask = 0xff;
21668 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21669 if (val == 0) /* XXX */
21672 for (i = 0; i < 25; i++)
21673 if ((val & (mask << i)) == val)
21679 /* Returns nonzero if the current function contains,
21680 or might contain a far jump. */
21682 thumb_far_jump_used_p (void)
21686 /* This test is only important for leaf functions. */
21687 /* assert (!leaf_function_p ()); */
21689 /* If we have already decided that far jumps may be used,
21690 do not bother checking again, and always return true even if
21691 it turns out that they are not being used. Once we have made
21692 the decision that far jumps are present (and that hence the link
21693 register will be pushed onto the stack) we cannot go back on it. */
21694 if (cfun->machine->far_jump_used)
21697 /* If this function is not being called from the prologue/epilogue
21698 generation code then it must be being called from the
21699 INITIAL_ELIMINATION_OFFSET macro. */
21700 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21702 /* In this case we know that we are being asked about the elimination
21703 of the arg pointer register. If that register is not being used,
21704 then there are no arguments on the stack, and we do not have to
21705 worry that a far jump might force the prologue to push the link
21706 register, changing the stack offsets. In this case we can just
21707 return false, since the presence of far jumps in the function will
21708 not affect stack offsets.
21710 If the arg pointer is live (or if it was live, but has now been
21711 eliminated and so set to dead) then we do have to test to see if
21712 the function might contain a far jump. This test can lead to some
21713 false negatives, since before reload is completed, then length of
21714 branch instructions is not known, so gcc defaults to returning their
21715 longest length, which in turn sets the far jump attribute to true.
21717 A false negative will not result in bad code being generated, but it
21718 will result in a needless push and pop of the link register. We
21719 hope that this does not occur too often.
21721 If we need doubleword stack alignment this could affect the other
21722 elimination offsets so we can't risk getting it wrong. */
21723 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21724 cfun->machine->arg_pointer_live = 1;
21725 else if (!cfun->machine->arg_pointer_live)
21729 /* Check to see if the function contains a branch
21730 insn with the far jump attribute set. */
21731 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21733 if (GET_CODE (insn) == JUMP_INSN
21734 /* Ignore tablejump patterns. */
21735 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21736 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21737 && get_attr_far_jump (insn) == FAR_JUMP_YES
21740 /* Record the fact that we have decided that
21741 the function does use far jumps. */
21742 cfun->machine->far_jump_used = 1;
21750 /* Return nonzero if FUNC must be entered in ARM mode. */
21752 is_called_in_ARM_mode (tree func)
21754 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21756 /* Ignore the problem about functions whose address is taken. */
21757 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21761 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21767 /* Given the stack offsets and register mask in OFFSETS, decide how
21768 many additional registers to push instead of subtracting a constant
21769 from SP. For epilogues the principle is the same except we use pop.
21770 FOR_PROLOGUE indicates which we're generating. */
21772 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21774 HOST_WIDE_INT amount;
21775 unsigned long live_regs_mask = offsets->saved_regs_mask;
21776 /* Extract a mask of the ones we can give to the Thumb's push/pop
21778 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21779 /* Then count how many other high registers will need to be pushed. */
21780 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21781 int n_free, reg_base;
21783 if (!for_prologue && frame_pointer_needed)
21784 amount = offsets->locals_base - offsets->saved_regs;
21786 amount = offsets->outgoing_args - offsets->saved_regs;
21788 /* If the stack frame size is 512 exactly, we can save one load
21789 instruction, which should make this a win even when optimizing
21791 if (!optimize_size && amount != 512)
21794 /* Can't do this if there are high registers to push. */
21795 if (high_regs_pushed != 0)
21798 /* Shouldn't do it in the prologue if no registers would normally
21799 be pushed at all. In the epilogue, also allow it if we'll have
21800 a pop insn for the PC. */
21803 || TARGET_BACKTRACE
21804 || (live_regs_mask & 1 << LR_REGNUM) == 0
21805 || TARGET_INTERWORK
21806 || crtl->args.pretend_args_size != 0))
21809 /* Don't do this if thumb_expand_prologue wants to emit instructions
21810 between the push and the stack frame allocation. */
21812 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21813 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21820 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21821 live_regs_mask >>= reg_base;
21824 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21825 && (for_prologue || call_used_regs[reg_base + n_free]))
21827 live_regs_mask >>= 1;
21833 gcc_assert (amount / 4 * 4 == amount);
21835 if (amount >= 512 && (amount - n_free * 4) < 512)
21836 return (amount - 508) / 4;
21837 if (amount <= n_free * 4)
21842 /* The bits which aren't usefully expanded as rtl. */
21844 thumb_unexpanded_epilogue (void)
21846 arm_stack_offsets *offsets;
21848 unsigned long live_regs_mask = 0;
21849 int high_regs_pushed = 0;
21851 int had_to_push_lr;
21854 if (cfun->machine->return_used_this_function != 0)
21857 if (IS_NAKED (arm_current_func_type ()))
21860 offsets = arm_get_frame_offsets ();
21861 live_regs_mask = offsets->saved_regs_mask;
21862 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21864 /* If we can deduce the registers used from the function's return value.
21865 This is more reliable that examining df_regs_ever_live_p () because that
21866 will be set if the register is ever used in the function, not just if
21867 the register is used to hold a return value. */
21868 size = arm_size_return_regs ();
21870 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21873 unsigned long extra_mask = (1 << extra_pop) - 1;
21874 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21878 /* The prolog may have pushed some high registers to use as
21879 work registers. e.g. the testsuite file:
21880 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21881 compiles to produce:
21882 push {r4, r5, r6, r7, lr}
21886 as part of the prolog. We have to undo that pushing here. */
21888 if (high_regs_pushed)
21890 unsigned long mask = live_regs_mask & 0xff;
21893 /* The available low registers depend on the size of the value we are
21901 /* Oh dear! We have no low registers into which we can pop
21904 ("no low registers available for popping high registers");
21906 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21907 if (live_regs_mask & (1 << next_hi_reg))
21910 while (high_regs_pushed)
21912 /* Find lo register(s) into which the high register(s) can
21914 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21916 if (mask & (1 << regno))
21917 high_regs_pushed--;
21918 if (high_regs_pushed == 0)
21922 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21924 /* Pop the values into the low register(s). */
21925 thumb_pop (asm_out_file, mask);
21927 /* Move the value(s) into the high registers. */
21928 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21930 if (mask & (1 << regno))
21932 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21935 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21936 if (live_regs_mask & (1 << next_hi_reg))
21941 live_regs_mask &= ~0x0f00;
21944 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21945 live_regs_mask &= 0xff;
21947 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21949 /* Pop the return address into the PC. */
21950 if (had_to_push_lr)
21951 live_regs_mask |= 1 << PC_REGNUM;
21953 /* Either no argument registers were pushed or a backtrace
21954 structure was created which includes an adjusted stack
21955 pointer, so just pop everything. */
21956 if (live_regs_mask)
21957 thumb_pop (asm_out_file, live_regs_mask);
21959 /* We have either just popped the return address into the
21960 PC or it is was kept in LR for the entire function.
21961 Note that thumb_pop has already called thumb_exit if the
21962 PC was in the list. */
21963 if (!had_to_push_lr)
21964 thumb_exit (asm_out_file, LR_REGNUM);
21968 /* Pop everything but the return address. */
21969 if (live_regs_mask)
21970 thumb_pop (asm_out_file, live_regs_mask);
21972 if (had_to_push_lr)
21976 /* We have no free low regs, so save one. */
21977 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21981 /* Get the return address into a temporary register. */
21982 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21986 /* Move the return address to lr. */
21987 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21989 /* Restore the low register. */
21990 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21995 regno = LAST_ARG_REGNUM;
22000 /* Remove the argument registers that were pushed onto the stack. */
22001 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22002 SP_REGNUM, SP_REGNUM,
22003 crtl->args.pretend_args_size);
22005 thumb_exit (asm_out_file, regno);
22011 /* Functions to save and restore machine-specific function data. */
22012 static struct machine_function *
22013 arm_init_machine_status (void)
22015 struct machine_function *machine;
22016 machine = ggc_alloc_cleared_machine_function ();
22018 #if ARM_FT_UNKNOWN != 0
22019 machine->func_type = ARM_FT_UNKNOWN;
22024 /* Return an RTX indicating where the return address to the
22025 calling function can be found. */
22027 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22032 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22035 /* Do anything needed before RTL is emitted for each function. */
22037 arm_init_expanders (void)
22039 /* Arrange to initialize and mark the machine per-function status. */
22040 init_machine_status = arm_init_machine_status;
22042 /* This is to stop the combine pass optimizing away the alignment
22043 adjustment of va_arg. */
22044 /* ??? It is claimed that this should not be necessary. */
22046 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22050 /* Like arm_compute_initial_elimination offset. Simpler because there
22051 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22052 to point at the base of the local variables after static stack
22053 space for a function has been allocated. */
22056 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22058 arm_stack_offsets *offsets;
22060 offsets = arm_get_frame_offsets ();
22064 case ARG_POINTER_REGNUM:
22067 case STACK_POINTER_REGNUM:
22068 return offsets->outgoing_args - offsets->saved_args;
22070 case FRAME_POINTER_REGNUM:
22071 return offsets->soft_frame - offsets->saved_args;
22073 case ARM_HARD_FRAME_POINTER_REGNUM:
22074 return offsets->saved_regs - offsets->saved_args;
22076 case THUMB_HARD_FRAME_POINTER_REGNUM:
22077 return offsets->locals_base - offsets->saved_args;
22080 gcc_unreachable ();
22084 case FRAME_POINTER_REGNUM:
22087 case STACK_POINTER_REGNUM:
22088 return offsets->outgoing_args - offsets->soft_frame;
22090 case ARM_HARD_FRAME_POINTER_REGNUM:
22091 return offsets->saved_regs - offsets->soft_frame;
22093 case THUMB_HARD_FRAME_POINTER_REGNUM:
22094 return offsets->locals_base - offsets->soft_frame;
22097 gcc_unreachable ();
22102 gcc_unreachable ();
22106 /* Generate the function's prologue. */
22109 thumb1_expand_prologue (void)
22113 HOST_WIDE_INT amount;
22114 arm_stack_offsets *offsets;
22115 unsigned long func_type;
22117 unsigned long live_regs_mask;
22118 unsigned long l_mask;
22119 unsigned high_regs_pushed = 0;
22121 func_type = arm_current_func_type ();
22123 /* Naked functions don't have prologues. */
22124 if (IS_NAKED (func_type))
22127 if (IS_INTERRUPT (func_type))
22129 error ("interrupt Service Routines cannot be coded in Thumb mode");
22133 if (is_called_in_ARM_mode (current_function_decl))
22134 emit_insn (gen_prologue_thumb1_interwork ());
22136 offsets = arm_get_frame_offsets ();
22137 live_regs_mask = offsets->saved_regs_mask;
22139 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22140 l_mask = live_regs_mask & 0x40ff;
22141 /* Then count how many other high registers will need to be pushed. */
22142 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22144 if (crtl->args.pretend_args_size)
22146 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22148 if (cfun->machine->uses_anonymous_args)
22150 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22151 unsigned long mask;
22153 mask = 1ul << (LAST_ARG_REGNUM + 1);
22154 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22156 insn = thumb1_emit_multi_reg_push (mask, 0);
22160 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22161 stack_pointer_rtx, x));
22163 RTX_FRAME_RELATED_P (insn) = 1;
22166 if (TARGET_BACKTRACE)
22168 HOST_WIDE_INT offset = 0;
22169 unsigned work_register;
22170 rtx work_reg, x, arm_hfp_rtx;
22172 /* We have been asked to create a stack backtrace structure.
22173 The code looks like this:
22177 0 sub SP, #16 Reserve space for 4 registers.
22178 2 push {R7} Push low registers.
22179 4 add R7, SP, #20 Get the stack pointer before the push.
22180 6 str R7, [SP, #8] Store the stack pointer
22181 (before reserving the space).
22182 8 mov R7, PC Get hold of the start of this code + 12.
22183 10 str R7, [SP, #16] Store it.
22184 12 mov R7, FP Get hold of the current frame pointer.
22185 14 str R7, [SP, #4] Store it.
22186 16 mov R7, LR Get hold of the current return address.
22187 18 str R7, [SP, #12] Store it.
22188 20 add R7, SP, #16 Point at the start of the
22189 backtrace structure.
22190 22 mov FP, R7 Put this value into the frame pointer. */
22192 work_register = thumb_find_work_register (live_regs_mask);
22193 work_reg = gen_rtx_REG (SImode, work_register);
22194 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22196 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22197 stack_pointer_rtx, GEN_INT (-16)));
22198 RTX_FRAME_RELATED_P (insn) = 1;
22202 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22203 RTX_FRAME_RELATED_P (insn) = 1;
22205 offset = bit_count (l_mask) * UNITS_PER_WORD;
22208 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22209 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22211 x = plus_constant (stack_pointer_rtx, offset + 4);
22212 x = gen_frame_mem (SImode, x);
22213 emit_move_insn (x, work_reg);
22215 /* Make sure that the instruction fetching the PC is in the right place
22216 to calculate "start of backtrace creation code + 12". */
22217 /* ??? The stores using the common WORK_REG ought to be enough to
22218 prevent the scheduler from doing anything weird. Failing that
22219 we could always move all of the following into an UNSPEC_VOLATILE. */
22222 x = gen_rtx_REG (SImode, PC_REGNUM);
22223 emit_move_insn (work_reg, x);
22225 x = plus_constant (stack_pointer_rtx, offset + 12);
22226 x = gen_frame_mem (SImode, x);
22227 emit_move_insn (x, work_reg);
22229 emit_move_insn (work_reg, arm_hfp_rtx);
22231 x = plus_constant (stack_pointer_rtx, offset);
22232 x = gen_frame_mem (SImode, x);
22233 emit_move_insn (x, work_reg);
22237 emit_move_insn (work_reg, arm_hfp_rtx);
22239 x = plus_constant (stack_pointer_rtx, offset);
22240 x = gen_frame_mem (SImode, x);
22241 emit_move_insn (x, work_reg);
22243 x = gen_rtx_REG (SImode, PC_REGNUM);
22244 emit_move_insn (work_reg, x);
22246 x = plus_constant (stack_pointer_rtx, offset + 12);
22247 x = gen_frame_mem (SImode, x);
22248 emit_move_insn (x, work_reg);
22251 x = gen_rtx_REG (SImode, LR_REGNUM);
22252 emit_move_insn (work_reg, x);
22254 x = plus_constant (stack_pointer_rtx, offset + 8);
22255 x = gen_frame_mem (SImode, x);
22256 emit_move_insn (x, work_reg);
22258 x = GEN_INT (offset + 12);
22259 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22261 emit_move_insn (arm_hfp_rtx, work_reg);
22263 /* Optimization: If we are not pushing any low registers but we are going
22264 to push some high registers then delay our first push. This will just
22265 be a push of LR and we can combine it with the push of the first high
22267 else if ((l_mask & 0xff) != 0
22268 || (high_regs_pushed == 0 && l_mask))
22270 unsigned long mask = l_mask;
22271 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22272 insn = thumb1_emit_multi_reg_push (mask, mask);
22273 RTX_FRAME_RELATED_P (insn) = 1;
22276 if (high_regs_pushed)
22278 unsigned pushable_regs;
22279 unsigned next_hi_reg;
22281 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22282 if (live_regs_mask & (1 << next_hi_reg))
22285 pushable_regs = l_mask & 0xff;
22287 if (pushable_regs == 0)
22288 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22290 while (high_regs_pushed > 0)
22292 unsigned long real_regs_mask = 0;
22294 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22296 if (pushable_regs & (1 << regno))
22298 emit_move_insn (gen_rtx_REG (SImode, regno),
22299 gen_rtx_REG (SImode, next_hi_reg));
22301 high_regs_pushed --;
22302 real_regs_mask |= (1 << next_hi_reg);
22304 if (high_regs_pushed)
22306 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22308 if (live_regs_mask & (1 << next_hi_reg))
22313 pushable_regs &= ~((1 << regno) - 1);
22319 /* If we had to find a work register and we have not yet
22320 saved the LR then add it to the list of regs to push. */
22321 if (l_mask == (1 << LR_REGNUM))
22323 pushable_regs |= l_mask;
22324 real_regs_mask |= l_mask;
22328 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22329 RTX_FRAME_RELATED_P (insn) = 1;
22333 /* Load the pic register before setting the frame pointer,
22334 so we can use r7 as a temporary work register. */
22335 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22336 arm_load_pic_register (live_regs_mask);
22338 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22339 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22340 stack_pointer_rtx);
22342 if (flag_stack_usage_info)
22343 current_function_static_stack_size
22344 = offsets->outgoing_args - offsets->saved_args;
22346 amount = offsets->outgoing_args - offsets->saved_regs;
22347 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22352 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22353 GEN_INT (- amount)));
22354 RTX_FRAME_RELATED_P (insn) = 1;
22360 /* The stack decrement is too big for an immediate value in a single
22361 insn. In theory we could issue multiple subtracts, but after
22362 three of them it becomes more space efficient to place the full
22363 value in the constant pool and load into a register. (Also the
22364 ARM debugger really likes to see only one stack decrement per
22365 function). So instead we look for a scratch register into which
22366 we can load the decrement, and then we subtract this from the
22367 stack pointer. Unfortunately on the thumb the only available
22368 scratch registers are the argument registers, and we cannot use
22369 these as they may hold arguments to the function. Instead we
22370 attempt to locate a call preserved register which is used by this
22371 function. If we can find one, then we know that it will have
22372 been pushed at the start of the prologue and so we can corrupt
22374 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22375 if (live_regs_mask & (1 << regno))
22378 gcc_assert(regno <= LAST_LO_REGNUM);
22380 reg = gen_rtx_REG (SImode, regno);
22382 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22384 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22385 stack_pointer_rtx, reg));
22387 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22388 plus_constant (stack_pointer_rtx,
22390 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22391 RTX_FRAME_RELATED_P (insn) = 1;
22395 if (frame_pointer_needed)
22396 thumb_set_frame_pointer (offsets);
22398 /* If we are profiling, make sure no instructions are scheduled before
22399 the call to mcount. Similarly if the user has requested no
22400 scheduling in the prolog. Similarly if we want non-call exceptions
22401 using the EABI unwinder, to prevent faulting instructions from being
22402 swapped with a stack adjustment. */
22403 if (crtl->profile || !TARGET_SCHED_PROLOG
22404 || (arm_except_unwind_info (&global_options) == UI_TARGET
22405 && cfun->can_throw_non_call_exceptions))
22406 emit_insn (gen_blockage ());
22408 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22409 if (live_regs_mask & 0xff)
22410 cfun->machine->lr_save_eliminated = 0;
22415 thumb1_expand_epilogue (void)
22417 HOST_WIDE_INT amount;
22418 arm_stack_offsets *offsets;
22421 /* Naked functions don't have prologues. */
22422 if (IS_NAKED (arm_current_func_type ()))
22425 offsets = arm_get_frame_offsets ();
22426 amount = offsets->outgoing_args - offsets->saved_regs;
22428 if (frame_pointer_needed)
22430 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22431 amount = offsets->locals_base - offsets->saved_regs;
22433 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22435 gcc_assert (amount >= 0);
22438 emit_insn (gen_blockage ());
22441 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22442 GEN_INT (amount)));
22445 /* r3 is always free in the epilogue. */
22446 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22448 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22449 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22453 /* Emit a USE (stack_pointer_rtx), so that
22454 the stack adjustment will not be deleted. */
22455 emit_insn (gen_prologue_use (stack_pointer_rtx));
22457 if (crtl->profile || !TARGET_SCHED_PROLOG)
22458 emit_insn (gen_blockage ());
22460 /* Emit a clobber for each insn that will be restored in the epilogue,
22461 so that flow2 will get register lifetimes correct. */
22462 for (regno = 0; regno < 13; regno++)
22463 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22464 emit_clobber (gen_rtx_REG (SImode, regno));
22466 if (! df_regs_ever_live_p (LR_REGNUM))
22467 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22470 /* Implementation of insn prologue_thumb1_interwork. This is the first
22471 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22474 thumb1_output_interwork (void)
22477 FILE *f = asm_out_file;
22479 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22480 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22482 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22484 /* Generate code sequence to switch us into Thumb mode. */
22485 /* The .code 32 directive has already been emitted by
22486 ASM_DECLARE_FUNCTION_NAME. */
22487 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22488 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22490 /* Generate a label, so that the debugger will notice the
22491 change in instruction sets. This label is also used by
22492 the assembler to bypass the ARM code when this function
22493 is called from a Thumb encoded function elsewhere in the
22494 same file. Hence the definition of STUB_NAME here must
22495 agree with the definition in gas/config/tc-arm.c. */
22497 #define STUB_NAME ".real_start_of"
22499 fprintf (f, "\t.code\t16\n");
22501 if (arm_dllexport_name_p (name))
22502 name = arm_strip_name_encoding (name);
22504 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22505 fprintf (f, "\t.thumb_func\n");
22506 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22511 /* Handle the case of a double word load into a low register from
22512 a computed memory address. The computed address may involve a
22513 register which is overwritten by the load. */
22515 thumb_load_double_from_address (rtx *operands)
22523 gcc_assert (GET_CODE (operands[0]) == REG);
22524 gcc_assert (GET_CODE (operands[1]) == MEM);
22526 /* Get the memory address. */
22527 addr = XEXP (operands[1], 0);
22529 /* Work out how the memory address is computed. */
22530 switch (GET_CODE (addr))
22533 operands[2] = adjust_address (operands[1], SImode, 4);
22535 if (REGNO (operands[0]) == REGNO (addr))
22537 output_asm_insn ("ldr\t%H0, %2", operands);
22538 output_asm_insn ("ldr\t%0, %1", operands);
22542 output_asm_insn ("ldr\t%0, %1", operands);
22543 output_asm_insn ("ldr\t%H0, %2", operands);
22548 /* Compute <address> + 4 for the high order load. */
22549 operands[2] = adjust_address (operands[1], SImode, 4);
22551 output_asm_insn ("ldr\t%0, %1", operands);
22552 output_asm_insn ("ldr\t%H0, %2", operands);
22556 arg1 = XEXP (addr, 0);
22557 arg2 = XEXP (addr, 1);
22559 if (CONSTANT_P (arg1))
22560 base = arg2, offset = arg1;
22562 base = arg1, offset = arg2;
22564 gcc_assert (GET_CODE (base) == REG);
22566 /* Catch the case of <address> = <reg> + <reg> */
22567 if (GET_CODE (offset) == REG)
22569 int reg_offset = REGNO (offset);
22570 int reg_base = REGNO (base);
22571 int reg_dest = REGNO (operands[0]);
22573 /* Add the base and offset registers together into the
22574 higher destination register. */
22575 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22576 reg_dest + 1, reg_base, reg_offset);
22578 /* Load the lower destination register from the address in
22579 the higher destination register. */
22580 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22581 reg_dest, reg_dest + 1);
22583 /* Load the higher destination register from its own address
22585 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22586 reg_dest + 1, reg_dest + 1);
22590 /* Compute <address> + 4 for the high order load. */
22591 operands[2] = adjust_address (operands[1], SImode, 4);
22593 /* If the computed address is held in the low order register
22594 then load the high order register first, otherwise always
22595 load the low order register first. */
22596 if (REGNO (operands[0]) == REGNO (base))
22598 output_asm_insn ("ldr\t%H0, %2", operands);
22599 output_asm_insn ("ldr\t%0, %1", operands);
22603 output_asm_insn ("ldr\t%0, %1", operands);
22604 output_asm_insn ("ldr\t%H0, %2", operands);
22610 /* With no registers to worry about we can just load the value
22612 operands[2] = adjust_address (operands[1], SImode, 4);
22614 output_asm_insn ("ldr\t%H0, %2", operands);
22615 output_asm_insn ("ldr\t%0, %1", operands);
22619 gcc_unreachable ();
22626 thumb_output_move_mem_multiple (int n, rtx *operands)
22633 if (REGNO (operands[4]) > REGNO (operands[5]))
22636 operands[4] = operands[5];
22639 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22640 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22644 if (REGNO (operands[4]) > REGNO (operands[5]))
22647 operands[4] = operands[5];
22650 if (REGNO (operands[5]) > REGNO (operands[6]))
22653 operands[5] = operands[6];
22656 if (REGNO (operands[4]) > REGNO (operands[5]))
22659 operands[4] = operands[5];
22663 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22664 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22668 gcc_unreachable ();
22674 /* Output a call-via instruction for thumb state. */
22676 thumb_call_via_reg (rtx reg)
22678 int regno = REGNO (reg);
22681 gcc_assert (regno < LR_REGNUM);
22683 /* If we are in the normal text section we can use a single instance
22684 per compilation unit. If we are doing function sections, then we need
22685 an entry per section, since we can't rely on reachability. */
22686 if (in_section == text_section)
22688 thumb_call_reg_needed = 1;
22690 if (thumb_call_via_label[regno] == NULL)
22691 thumb_call_via_label[regno] = gen_label_rtx ();
22692 labelp = thumb_call_via_label + regno;
22696 if (cfun->machine->call_via[regno] == NULL)
22697 cfun->machine->call_via[regno] = gen_label_rtx ();
22698 labelp = cfun->machine->call_via + regno;
22701 output_asm_insn ("bl\t%a0", labelp);
22705 /* Routines for generating rtl. */
22707 thumb_expand_movmemqi (rtx *operands)
22709 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22710 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22711 HOST_WIDE_INT len = INTVAL (operands[2]);
22712 HOST_WIDE_INT offset = 0;
22716 emit_insn (gen_movmem12b (out, in, out, in));
22722 emit_insn (gen_movmem8b (out, in, out, in));
22728 rtx reg = gen_reg_rtx (SImode);
22729 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22730 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22737 rtx reg = gen_reg_rtx (HImode);
22738 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22739 plus_constant (in, offset))));
22740 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22748 rtx reg = gen_reg_rtx (QImode);
22749 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22750 plus_constant (in, offset))));
22751 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22757 thumb_reload_out_hi (rtx *operands)
22759 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22762 /* Handle reading a half-word from memory during reload. */
22764 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22766 gcc_unreachable ();
22769 /* Return the length of a function name prefix
22770 that starts with the character 'c'. */
22772 arm_get_strip_length (int c)
22776 ARM_NAME_ENCODING_LENGTHS
22781 /* Return a pointer to a function's name with any
22782 and all prefix encodings stripped from it. */
22784 arm_strip_name_encoding (const char *name)
22788 while ((skip = arm_get_strip_length (* name)))
22794 /* If there is a '*' anywhere in the name's prefix, then
22795 emit the stripped name verbatim, otherwise prepend an
22796 underscore if leading underscores are being used. */
22798 arm_asm_output_labelref (FILE *stream, const char *name)
22803 while ((skip = arm_get_strip_length (* name)))
22805 verbatim |= (*name == '*');
22810 fputs (name, stream);
22812 asm_fprintf (stream, "%U%s", name);
22816 arm_file_start (void)
22820 if (TARGET_UNIFIED_ASM)
22821 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22825 const char *fpu_name;
22826 if (arm_selected_arch)
22827 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22828 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22829 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22831 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22833 if (TARGET_SOFT_FLOAT)
22836 fpu_name = "softvfp";
22838 fpu_name = "softfpa";
22842 fpu_name = arm_fpu_desc->name;
22843 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22845 if (TARGET_HARD_FLOAT)
22846 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22847 if (TARGET_HARD_FLOAT_ABI)
22848 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22851 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22853 /* Some of these attributes only apply when the corresponding features
22854 are used. However we don't have any easy way of figuring this out.
22855 Conservatively record the setting that would have been used. */
22857 if (flag_rounding_math)
22858 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22860 if (!flag_unsafe_math_optimizations)
22862 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22863 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22865 if (flag_signaling_nans)
22866 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22868 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22869 flag_finite_math_only ? 1 : 3);
22871 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22872 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22873 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22875 /* Tag_ABI_optimization_goals. */
22878 else if (optimize >= 2)
22884 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22886 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22888 if (arm_fp16_format)
22889 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22891 if (arm_lang_output_object_attributes_hook)
22892 arm_lang_output_object_attributes_hook();
22895 default_file_start ();
22899 arm_file_end (void)
22903 if (NEED_INDICATE_EXEC_STACK)
22904 /* Add .note.GNU-stack. */
22905 file_end_indicate_exec_stack ();
22907 if (! thumb_call_reg_needed)
22910 switch_to_section (text_section);
22911 asm_fprintf (asm_out_file, "\t.code 16\n");
22912 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22914 for (regno = 0; regno < LR_REGNUM; regno++)
22916 rtx label = thumb_call_via_label[regno];
22920 targetm.asm_out.internal_label (asm_out_file, "L",
22921 CODE_LABEL_NUMBER (label));
22922 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22928 /* Symbols in the text segment can be accessed without indirecting via the
22929 constant pool; it may take an extra binary operation, but this is still
22930 faster than indirecting via memory. Don't do this when not optimizing,
22931 since we won't be calculating al of the offsets necessary to do this
22935 arm_encode_section_info (tree decl, rtx rtl, int first)
22937 if (optimize > 0 && TREE_CONSTANT (decl))
22938 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22940 default_encode_section_info (decl, rtl, first);
22942 #endif /* !ARM_PE */
22945 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22947 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22948 && !strcmp (prefix, "L"))
22950 arm_ccfsm_state = 0;
22951 arm_target_insn = NULL;
22953 default_internal_label (stream, prefix, labelno);
22956 /* Output code to add DELTA to the first argument, and then jump
22957 to FUNCTION. Used for C++ multiple inheritance. */
22959 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22960 HOST_WIDE_INT delta,
22961 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22964 static int thunk_label = 0;
22967 int mi_delta = delta;
22968 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22970 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22973 mi_delta = - mi_delta;
22977 int labelno = thunk_label++;
22978 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22979 /* Thunks are entered in arm mode when avaiable. */
22980 if (TARGET_THUMB1_ONLY)
22982 /* push r3 so we can use it as a temporary. */
22983 /* TODO: Omit this save if r3 is not used. */
22984 fputs ("\tpush {r3}\n", file);
22985 fputs ("\tldr\tr3, ", file);
22989 fputs ("\tldr\tr12, ", file);
22991 assemble_name (file, label);
22992 fputc ('\n', file);
22995 /* If we are generating PIC, the ldr instruction below loads
22996 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22997 the address of the add + 8, so we have:
22999 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23002 Note that we have "+ 1" because some versions of GNU ld
23003 don't set the low bit of the result for R_ARM_REL32
23004 relocations against thumb function symbols.
23005 On ARMv6M this is +4, not +8. */
23006 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23007 assemble_name (file, labelpc);
23008 fputs (":\n", file);
23009 if (TARGET_THUMB1_ONLY)
23011 /* This is 2 insns after the start of the thunk, so we know it
23012 is 4-byte aligned. */
23013 fputs ("\tadd\tr3, pc, r3\n", file);
23014 fputs ("\tmov r12, r3\n", file);
23017 fputs ("\tadd\tr12, pc, r12\n", file);
23019 else if (TARGET_THUMB1_ONLY)
23020 fputs ("\tmov r12, r3\n", file);
23022 if (TARGET_THUMB1_ONLY)
23024 if (mi_delta > 255)
23026 fputs ("\tldr\tr3, ", file);
23027 assemble_name (file, label);
23028 fputs ("+4\n", file);
23029 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23030 mi_op, this_regno, this_regno);
23032 else if (mi_delta != 0)
23034 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23035 mi_op, this_regno, this_regno,
23041 /* TODO: Use movw/movt for large constants when available. */
23042 while (mi_delta != 0)
23044 if ((mi_delta & (3 << shift)) == 0)
23048 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23049 mi_op, this_regno, this_regno,
23050 mi_delta & (0xff << shift));
23051 mi_delta &= ~(0xff << shift);
23058 if (TARGET_THUMB1_ONLY)
23059 fputs ("\tpop\t{r3}\n", file);
23061 fprintf (file, "\tbx\tr12\n");
23062 ASM_OUTPUT_ALIGN (file, 2);
23063 assemble_name (file, label);
23064 fputs (":\n", file);
23067 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23068 rtx tem = XEXP (DECL_RTL (function), 0);
23069 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23070 tem = gen_rtx_MINUS (GET_MODE (tem),
23072 gen_rtx_SYMBOL_REF (Pmode,
23073 ggc_strdup (labelpc)));
23074 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23077 /* Output ".word .LTHUNKn". */
23078 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23080 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23081 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23085 fputs ("\tb\t", file);
23086 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23087 if (NEED_PLT_RELOC)
23088 fputs ("(PLT)", file);
23089 fputc ('\n', file);
23094 arm_emit_vector_const (FILE *file, rtx x)
23097 const char * pattern;
23099 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23101 switch (GET_MODE (x))
23103 case V2SImode: pattern = "%08x"; break;
23104 case V4HImode: pattern = "%04x"; break;
23105 case V8QImode: pattern = "%02x"; break;
23106 default: gcc_unreachable ();
23109 fprintf (file, "0x");
23110 for (i = CONST_VECTOR_NUNITS (x); i--;)
23114 element = CONST_VECTOR_ELT (x, i);
23115 fprintf (file, pattern, INTVAL (element));
23121 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23122 HFmode constant pool entries are actually loaded with ldr. */
23124 arm_emit_fp16_const (rtx c)
23129 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23130 bits = real_to_target (NULL, &r, HFmode);
23131 if (WORDS_BIG_ENDIAN)
23132 assemble_zeros (2);
23133 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23134 if (!WORDS_BIG_ENDIAN)
23135 assemble_zeros (2);
23139 arm_output_load_gr (rtx *operands)
23146 if (GET_CODE (operands [1]) != MEM
23147 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23148 || GET_CODE (reg = XEXP (sum, 0)) != REG
23149 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23150 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23151 return "wldrw%?\t%0, %1";
23153 /* Fix up an out-of-range load of a GR register. */
23154 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23155 wcgr = operands[0];
23157 output_asm_insn ("ldr%?\t%0, %1", operands);
23159 operands[0] = wcgr;
23161 output_asm_insn ("tmcr%?\t%0, %1", operands);
23162 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23167 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23169 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23170 named arg and all anonymous args onto the stack.
23171 XXX I know the prologue shouldn't be pushing registers, but it is faster
23175 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23176 enum machine_mode mode,
23179 int second_time ATTRIBUTE_UNUSED)
23181 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23184 cfun->machine->uses_anonymous_args = 1;
23185 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23187 nregs = pcum->aapcs_ncrn;
23188 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23192 nregs = pcum->nregs;
23194 if (nregs < NUM_ARG_REGS)
23195 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23198 /* Return nonzero if the CONSUMER instruction (a store) does not need
23199 PRODUCER's value to calculate the address. */
23202 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23204 rtx value = PATTERN (producer);
23205 rtx addr = PATTERN (consumer);
23207 if (GET_CODE (value) == COND_EXEC)
23208 value = COND_EXEC_CODE (value);
23209 if (GET_CODE (value) == PARALLEL)
23210 value = XVECEXP (value, 0, 0);
23211 value = XEXP (value, 0);
23212 if (GET_CODE (addr) == COND_EXEC)
23213 addr = COND_EXEC_CODE (addr);
23214 if (GET_CODE (addr) == PARALLEL)
23215 addr = XVECEXP (addr, 0, 0);
23216 addr = XEXP (addr, 0);
23218 return !reg_overlap_mentioned_p (value, addr);
23221 /* Return nonzero if the CONSUMER instruction (a store) does need
23222 PRODUCER's value to calculate the address. */
23225 arm_early_store_addr_dep (rtx producer, rtx consumer)
23227 return !arm_no_early_store_addr_dep (producer, consumer);
23230 /* Return nonzero if the CONSUMER instruction (a load) does need
23231 PRODUCER's value to calculate the address. */
23234 arm_early_load_addr_dep (rtx producer, rtx consumer)
23236 rtx value = PATTERN (producer);
23237 rtx addr = PATTERN (consumer);
23239 if (GET_CODE (value) == COND_EXEC)
23240 value = COND_EXEC_CODE (value);
23241 if (GET_CODE (value) == PARALLEL)
23242 value = XVECEXP (value, 0, 0);
23243 value = XEXP (value, 0);
23244 if (GET_CODE (addr) == COND_EXEC)
23245 addr = COND_EXEC_CODE (addr);
23246 if (GET_CODE (addr) == PARALLEL)
23247 addr = XVECEXP (addr, 0, 0);
23248 addr = XEXP (addr, 1);
23250 return reg_overlap_mentioned_p (value, addr);
23253 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23254 have an early register shift value or amount dependency on the
23255 result of PRODUCER. */
23258 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23260 rtx value = PATTERN (producer);
23261 rtx op = PATTERN (consumer);
23264 if (GET_CODE (value) == COND_EXEC)
23265 value = COND_EXEC_CODE (value);
23266 if (GET_CODE (value) == PARALLEL)
23267 value = XVECEXP (value, 0, 0);
23268 value = XEXP (value, 0);
23269 if (GET_CODE (op) == COND_EXEC)
23270 op = COND_EXEC_CODE (op);
23271 if (GET_CODE (op) == PARALLEL)
23272 op = XVECEXP (op, 0, 0);
23275 early_op = XEXP (op, 0);
23276 /* This is either an actual independent shift, or a shift applied to
23277 the first operand of another operation. We want the whole shift
23279 if (GET_CODE (early_op) == REG)
23282 return !reg_overlap_mentioned_p (value, early_op);
23285 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23286 have an early register shift value dependency on the result of
23290 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23292 rtx value = PATTERN (producer);
23293 rtx op = PATTERN (consumer);
23296 if (GET_CODE (value) == COND_EXEC)
23297 value = COND_EXEC_CODE (value);
23298 if (GET_CODE (value) == PARALLEL)
23299 value = XVECEXP (value, 0, 0);
23300 value = XEXP (value, 0);
23301 if (GET_CODE (op) == COND_EXEC)
23302 op = COND_EXEC_CODE (op);
23303 if (GET_CODE (op) == PARALLEL)
23304 op = XVECEXP (op, 0, 0);
23307 early_op = XEXP (op, 0);
23309 /* This is either an actual independent shift, or a shift applied to
23310 the first operand of another operation. We want the value being
23311 shifted, in either case. */
23312 if (GET_CODE (early_op) != REG)
23313 early_op = XEXP (early_op, 0);
23315 return !reg_overlap_mentioned_p (value, early_op);
23318 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23319 have an early register mult dependency on the result of
23323 arm_no_early_mul_dep (rtx producer, rtx consumer)
23325 rtx value = PATTERN (producer);
23326 rtx op = PATTERN (consumer);
23328 if (GET_CODE (value) == COND_EXEC)
23329 value = COND_EXEC_CODE (value);
23330 if (GET_CODE (value) == PARALLEL)
23331 value = XVECEXP (value, 0, 0);
23332 value = XEXP (value, 0);
23333 if (GET_CODE (op) == COND_EXEC)
23334 op = COND_EXEC_CODE (op);
23335 if (GET_CODE (op) == PARALLEL)
23336 op = XVECEXP (op, 0, 0);
23339 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23341 if (GET_CODE (XEXP (op, 0)) == MULT)
23342 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23344 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23350 /* We can't rely on the caller doing the proper promotion when
23351 using APCS or ATPCS. */
23354 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23356 return !TARGET_AAPCS_BASED;
23359 static enum machine_mode
23360 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23361 enum machine_mode mode,
23362 int *punsignedp ATTRIBUTE_UNUSED,
23363 const_tree fntype ATTRIBUTE_UNUSED,
23364 int for_return ATTRIBUTE_UNUSED)
23366 if (GET_MODE_CLASS (mode) == MODE_INT
23367 && GET_MODE_SIZE (mode) < 4)
23373 /* AAPCS based ABIs use short enums by default. */
23376 arm_default_short_enums (void)
23378 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23382 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23385 arm_align_anon_bitfield (void)
23387 return TARGET_AAPCS_BASED;
23391 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23394 arm_cxx_guard_type (void)
23396 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23399 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23400 has an accumulator dependency on the result of the producer (a
23401 multiplication instruction) and no other dependency on that result. */
23403 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23405 rtx mul = PATTERN (producer);
23406 rtx mac = PATTERN (consumer);
23408 rtx mac_op0, mac_op1, mac_acc;
23410 if (GET_CODE (mul) == COND_EXEC)
23411 mul = COND_EXEC_CODE (mul);
23412 if (GET_CODE (mac) == COND_EXEC)
23413 mac = COND_EXEC_CODE (mac);
23415 /* Check that mul is of the form (set (...) (mult ...))
23416 and mla is of the form (set (...) (plus (mult ...) (...))). */
23417 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23418 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23419 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23422 mul_result = XEXP (mul, 0);
23423 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23424 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23425 mac_acc = XEXP (XEXP (mac, 1), 1);
23427 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23428 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23429 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23433 /* The EABI says test the least significant bit of a guard variable. */
23436 arm_cxx_guard_mask_bit (void)
23438 return TARGET_AAPCS_BASED;
23442 /* The EABI specifies that all array cookies are 8 bytes long. */
23445 arm_get_cookie_size (tree type)
23449 if (!TARGET_AAPCS_BASED)
23450 return default_cxx_get_cookie_size (type);
23452 size = build_int_cst (sizetype, 8);
23457 /* The EABI says that array cookies should also contain the element size. */
23460 arm_cookie_has_size (void)
23462 return TARGET_AAPCS_BASED;
23466 /* The EABI says constructors and destructors should return a pointer to
23467 the object constructed/destroyed. */
23470 arm_cxx_cdtor_returns_this (void)
23472 return TARGET_AAPCS_BASED;
23475 /* The EABI says that an inline function may never be the key
23479 arm_cxx_key_method_may_be_inline (void)
23481 return !TARGET_AAPCS_BASED;
23485 arm_cxx_determine_class_data_visibility (tree decl)
23487 if (!TARGET_AAPCS_BASED
23488 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23491 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23492 is exported. However, on systems without dynamic vague linkage,
23493 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23494 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23495 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23497 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23498 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23502 arm_cxx_class_data_always_comdat (void)
23504 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23505 vague linkage if the class has no key function. */
23506 return !TARGET_AAPCS_BASED;
23510 /* The EABI says __aeabi_atexit should be used to register static
23514 arm_cxx_use_aeabi_atexit (void)
23516 return TARGET_AAPCS_BASED;
23521 arm_set_return_address (rtx source, rtx scratch)
23523 arm_stack_offsets *offsets;
23524 HOST_WIDE_INT delta;
23526 unsigned long saved_regs;
23528 offsets = arm_get_frame_offsets ();
23529 saved_regs = offsets->saved_regs_mask;
23531 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23532 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23535 if (frame_pointer_needed)
23536 addr = plus_constant(hard_frame_pointer_rtx, -4);
23539 /* LR will be the first saved register. */
23540 delta = offsets->outgoing_args - (offsets->frame + 4);
23545 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23546 GEN_INT (delta & ~4095)));
23551 addr = stack_pointer_rtx;
23553 addr = plus_constant (addr, delta);
23555 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23561 thumb_set_return_address (rtx source, rtx scratch)
23563 arm_stack_offsets *offsets;
23564 HOST_WIDE_INT delta;
23565 HOST_WIDE_INT limit;
23568 unsigned long mask;
23572 offsets = arm_get_frame_offsets ();
23573 mask = offsets->saved_regs_mask;
23574 if (mask & (1 << LR_REGNUM))
23577 /* Find the saved regs. */
23578 if (frame_pointer_needed)
23580 delta = offsets->soft_frame - offsets->saved_args;
23581 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23587 delta = offsets->outgoing_args - offsets->saved_args;
23590 /* Allow for the stack frame. */
23591 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23593 /* The link register is always the first saved register. */
23596 /* Construct the address. */
23597 addr = gen_rtx_REG (SImode, reg);
23600 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23601 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23605 addr = plus_constant (addr, delta);
23607 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23610 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23613 /* Implements target hook vector_mode_supported_p. */
23615 arm_vector_mode_supported_p (enum machine_mode mode)
23617 /* Neon also supports V2SImode, etc. listed in the clause below. */
23618 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23619 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23622 if ((TARGET_NEON || TARGET_IWMMXT)
23623 && ((mode == V2SImode)
23624 || (mode == V4HImode)
23625 || (mode == V8QImode)))
23628 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23629 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23630 || mode == V2HAmode))
23636 /* Implements target hook array_mode_supported_p. */
23639 arm_array_mode_supported_p (enum machine_mode mode,
23640 unsigned HOST_WIDE_INT nelems)
23643 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23644 && (nelems >= 2 && nelems <= 4))
23650 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23651 registers when autovectorizing for Neon, at least until multiple vector
23652 widths are supported properly by the middle-end. */
23654 static enum machine_mode
23655 arm_preferred_simd_mode (enum machine_mode mode)
23661 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23663 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23665 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23667 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23669 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23676 if (TARGET_REALLY_IWMMXT)
23692 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23694 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23695 using r0-r4 for function arguments, r7 for the stack frame and don't have
23696 enough left over to do doubleword arithmetic. For Thumb-2 all the
23697 potentially problematic instructions accept high registers so this is not
23698 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23699 that require many low registers. */
23701 arm_class_likely_spilled_p (reg_class_t rclass)
23703 if ((TARGET_THUMB1 && rclass == LO_REGS)
23704 || rclass == CC_REG)
23710 /* Implements target hook small_register_classes_for_mode_p. */
23712 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23714 return TARGET_THUMB1;
23717 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23718 ARM insns and therefore guarantee that the shift count is modulo 256.
23719 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23720 guarantee no particular behavior for out-of-range counts. */
23722 static unsigned HOST_WIDE_INT
23723 arm_shift_truncation_mask (enum machine_mode mode)
23725 return mode == SImode ? 255 : 0;
23729 /* Map internal gcc register numbers to DWARF2 register numbers. */
23732 arm_dbx_register_number (unsigned int regno)
23737 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23738 compatibility. The EABI defines them as registers 96-103. */
23739 if (IS_FPA_REGNUM (regno))
23740 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23742 if (IS_VFP_REGNUM (regno))
23744 /* See comment in arm_dwarf_register_span. */
23745 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23746 return 64 + regno - FIRST_VFP_REGNUM;
23748 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23751 if (IS_IWMMXT_GR_REGNUM (regno))
23752 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23754 if (IS_IWMMXT_REGNUM (regno))
23755 return 112 + regno - FIRST_IWMMXT_REGNUM;
23757 gcc_unreachable ();
23760 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23761 GCC models tham as 64 32-bit registers, so we need to describe this to
23762 the DWARF generation code. Other registers can use the default. */
23764 arm_dwarf_register_span (rtx rtl)
23771 regno = REGNO (rtl);
23772 if (!IS_VFP_REGNUM (regno))
23775 /* XXX FIXME: The EABI defines two VFP register ranges:
23776 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23778 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23779 corresponding D register. Until GDB supports this, we shall use the
23780 legacy encodings. We also use these encodings for D0-D15 for
23781 compatibility with older debuggers. */
23782 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23785 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23786 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23787 regno = (regno - FIRST_VFP_REGNUM) / 2;
23788 for (i = 0; i < nregs; i++)
23789 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23794 #if ARM_UNWIND_INFO
23795 /* Emit unwind directives for a store-multiple instruction or stack pointer
23796 push during alignment.
23797 These should only ever be generated by the function prologue code, so
23798 expect them to have a particular form. */
23801 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23804 HOST_WIDE_INT offset;
23805 HOST_WIDE_INT nregs;
23811 e = XVECEXP (p, 0, 0);
23812 if (GET_CODE (e) != SET)
23815 /* First insn will adjust the stack pointer. */
23816 if (GET_CODE (e) != SET
23817 || GET_CODE (XEXP (e, 0)) != REG
23818 || REGNO (XEXP (e, 0)) != SP_REGNUM
23819 || GET_CODE (XEXP (e, 1)) != PLUS)
23822 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23823 nregs = XVECLEN (p, 0) - 1;
23825 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23828 /* The function prologue may also push pc, but not annotate it as it is
23829 never restored. We turn this into a stack pointer adjustment. */
23830 if (nregs * 4 == offset - 4)
23832 fprintf (asm_out_file, "\t.pad #4\n");
23836 fprintf (asm_out_file, "\t.save {");
23838 else if (IS_VFP_REGNUM (reg))
23841 fprintf (asm_out_file, "\t.vsave {");
23843 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23845 /* FPA registers are done differently. */
23846 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23850 /* Unknown register type. */
23853 /* If the stack increment doesn't match the size of the saved registers,
23854 something has gone horribly wrong. */
23855 if (offset != nregs * reg_size)
23860 /* The remaining insns will describe the stores. */
23861 for (i = 1; i <= nregs; i++)
23863 /* Expect (set (mem <addr>) (reg)).
23864 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23865 e = XVECEXP (p, 0, i);
23866 if (GET_CODE (e) != SET
23867 || GET_CODE (XEXP (e, 0)) != MEM
23868 || GET_CODE (XEXP (e, 1)) != REG)
23871 reg = REGNO (XEXP (e, 1));
23876 fprintf (asm_out_file, ", ");
23877 /* We can't use %r for vfp because we need to use the
23878 double precision register names. */
23879 if (IS_VFP_REGNUM (reg))
23880 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23882 asm_fprintf (asm_out_file, "%r", reg);
23884 #ifdef ENABLE_CHECKING
23885 /* Check that the addresses are consecutive. */
23886 e = XEXP (XEXP (e, 0), 0);
23887 if (GET_CODE (e) == PLUS)
23889 offset += reg_size;
23890 if (GET_CODE (XEXP (e, 0)) != REG
23891 || REGNO (XEXP (e, 0)) != SP_REGNUM
23892 || GET_CODE (XEXP (e, 1)) != CONST_INT
23893 || offset != INTVAL (XEXP (e, 1)))
23897 || GET_CODE (e) != REG
23898 || REGNO (e) != SP_REGNUM)
23902 fprintf (asm_out_file, "}\n");
23905 /* Emit unwind directives for a SET. */
23908 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23916 switch (GET_CODE (e0))
23919 /* Pushing a single register. */
23920 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23921 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23922 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23925 asm_fprintf (asm_out_file, "\t.save ");
23926 if (IS_VFP_REGNUM (REGNO (e1)))
23927 asm_fprintf(asm_out_file, "{d%d}\n",
23928 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23930 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23934 if (REGNO (e0) == SP_REGNUM)
23936 /* A stack increment. */
23937 if (GET_CODE (e1) != PLUS
23938 || GET_CODE (XEXP (e1, 0)) != REG
23939 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23940 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23943 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23944 -INTVAL (XEXP (e1, 1)));
23946 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23948 HOST_WIDE_INT offset;
23950 if (GET_CODE (e1) == PLUS)
23952 if (GET_CODE (XEXP (e1, 0)) != REG
23953 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23955 reg = REGNO (XEXP (e1, 0));
23956 offset = INTVAL (XEXP (e1, 1));
23957 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23958 HARD_FRAME_POINTER_REGNUM, reg,
23961 else if (GET_CODE (e1) == REG)
23964 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23965 HARD_FRAME_POINTER_REGNUM, reg);
23970 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23972 /* Move from sp to reg. */
23973 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23975 else if (GET_CODE (e1) == PLUS
23976 && GET_CODE (XEXP (e1, 0)) == REG
23977 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23978 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23980 /* Set reg to offset from sp. */
23981 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23982 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23994 /* Emit unwind directives for the given insn. */
23997 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24000 bool handled_one = false;
24002 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24005 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24006 && (TREE_NOTHROW (current_function_decl)
24007 || crtl->all_throwers_are_sibcalls))
24010 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24013 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24015 pat = XEXP (note, 0);
24016 switch (REG_NOTE_KIND (note))
24018 case REG_FRAME_RELATED_EXPR:
24021 case REG_CFA_REGISTER:
24024 pat = PATTERN (insn);
24025 if (GET_CODE (pat) == PARALLEL)
24026 pat = XVECEXP (pat, 0, 0);
24029 /* Only emitted for IS_STACKALIGN re-alignment. */
24034 src = SET_SRC (pat);
24035 dest = SET_DEST (pat);
24037 gcc_assert (src == stack_pointer_rtx);
24038 reg = REGNO (dest);
24039 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24042 handled_one = true;
24045 case REG_CFA_DEF_CFA:
24046 case REG_CFA_EXPRESSION:
24047 case REG_CFA_ADJUST_CFA:
24048 case REG_CFA_OFFSET:
24049 /* ??? Only handling here what we actually emit. */
24050 gcc_unreachable ();
24058 pat = PATTERN (insn);
24061 switch (GET_CODE (pat))
24064 arm_unwind_emit_set (asm_out_file, pat);
24068 /* Store multiple. */
24069 arm_unwind_emit_sequence (asm_out_file, pat);
24078 /* Output a reference from a function exception table to the type_info
24079 object X. The EABI specifies that the symbol should be relocated by
24080 an R_ARM_TARGET2 relocation. */
24083 arm_output_ttype (rtx x)
24085 fputs ("\t.word\t", asm_out_file);
24086 output_addr_const (asm_out_file, x);
24087 /* Use special relocations for symbol references. */
24088 if (GET_CODE (x) != CONST_INT)
24089 fputs ("(TARGET2)", asm_out_file);
24090 fputc ('\n', asm_out_file);
24095 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24098 arm_asm_emit_except_personality (rtx personality)
24100 fputs ("\t.personality\t", asm_out_file);
24101 output_addr_const (asm_out_file, personality);
24102 fputc ('\n', asm_out_file);
24105 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24108 arm_asm_init_sections (void)
24110 exception_section = get_unnamed_section (0, output_section_asm_op,
24113 #endif /* ARM_UNWIND_INFO */
24115 /* Output unwind directives for the start/end of a function. */
24118 arm_output_fn_unwind (FILE * f, bool prologue)
24120 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24124 fputs ("\t.fnstart\n", f);
24127 /* If this function will never be unwound, then mark it as such.
24128 The came condition is used in arm_unwind_emit to suppress
24129 the frame annotations. */
24130 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24131 && (TREE_NOTHROW (current_function_decl)
24132 || crtl->all_throwers_are_sibcalls))
24133 fputs("\t.cantunwind\n", f);
24135 fputs ("\t.fnend\n", f);
24140 arm_emit_tls_decoration (FILE *fp, rtx x)
24142 enum tls_reloc reloc;
24145 val = XVECEXP (x, 0, 0);
24146 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24148 output_addr_const (fp, val);
24153 fputs ("(tlsgd)", fp);
24156 fputs ("(tlsldm)", fp);
24159 fputs ("(tlsldo)", fp);
24162 fputs ("(gottpoff)", fp);
24165 fputs ("(tpoff)", fp);
24168 fputs ("(tlsdesc)", fp);
24171 gcc_unreachable ();
24180 fputs (" + (. - ", fp);
24181 output_addr_const (fp, XVECEXP (x, 0, 2));
24182 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24183 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24184 output_addr_const (fp, XVECEXP (x, 0, 3));
24194 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24197 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24199 gcc_assert (size == 4);
24200 fputs ("\t.word\t", file);
24201 output_addr_const (file, x);
24202 fputs ("(tlsldo)", file);
24205 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24208 arm_output_addr_const_extra (FILE *fp, rtx x)
24210 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24211 return arm_emit_tls_decoration (fp, x);
24212 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24215 int labelno = INTVAL (XVECEXP (x, 0, 0));
24217 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24218 assemble_name_raw (fp, label);
24222 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24224 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24228 output_addr_const (fp, XVECEXP (x, 0, 0));
24232 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24234 output_addr_const (fp, XVECEXP (x, 0, 0));
24238 output_addr_const (fp, XVECEXP (x, 0, 1));
24242 else if (GET_CODE (x) == CONST_VECTOR)
24243 return arm_emit_vector_const (fp, x);
24248 /* Output assembly for a shift instruction.
24249 SET_FLAGS determines how the instruction modifies the condition codes.
24250 0 - Do not set condition codes.
24251 1 - Set condition codes.
24252 2 - Use smallest instruction. */
24254 arm_output_shift(rtx * operands, int set_flags)
24257 static const char flag_chars[3] = {'?', '.', '!'};
24262 c = flag_chars[set_flags];
24263 if (TARGET_UNIFIED_ASM)
24265 shift = shift_op(operands[3], &val);
24269 operands[2] = GEN_INT(val);
24270 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24273 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24276 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24277 output_asm_insn (pattern, operands);
24281 /* Output a Thumb-1 casesi dispatch sequence. */
24283 thumb1_output_casesi (rtx *operands)
24285 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24287 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24289 switch (GET_MODE(diff_vec))
24292 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24293 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24295 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24296 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24298 return "bl\t%___gnu_thumb1_case_si";
24300 gcc_unreachable ();
24304 /* Output a Thumb-2 casesi instruction. */
24306 thumb2_output_casesi (rtx *operands)
24308 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24310 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24312 output_asm_insn ("cmp\t%0, %1", operands);
24313 output_asm_insn ("bhi\t%l3", operands);
24314 switch (GET_MODE(diff_vec))
24317 return "tbb\t[%|pc, %0]";
24319 return "tbh\t[%|pc, %0, lsl #1]";
24323 output_asm_insn ("adr\t%4, %l2", operands);
24324 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24325 output_asm_insn ("add\t%4, %4, %5", operands);
24330 output_asm_insn ("adr\t%4, %l2", operands);
24331 return "ldr\t%|pc, [%4, %0, lsl #2]";
24334 gcc_unreachable ();
24338 /* Most ARM cores are single issue, but some newer ones can dual issue.
24339 The scheduler descriptions rely on this being correct. */
24341 arm_issue_rate (void)
24363 /* A table and a function to perform ARM-specific name mangling for
24364 NEON vector types in order to conform to the AAPCS (see "Procedure
24365 Call Standard for the ARM Architecture", Appendix A). To qualify
24366 for emission with the mangled names defined in that document, a
24367 vector type must not only be of the correct mode but also be
24368 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24371 enum machine_mode mode;
24372 const char *element_type_name;
24373 const char *aapcs_name;
24374 } arm_mangle_map_entry;
24376 static arm_mangle_map_entry arm_mangle_map[] = {
24377 /* 64-bit containerized types. */
24378 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24379 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24380 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24381 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24382 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24383 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24384 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24385 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24386 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24387 /* 128-bit containerized types. */
24388 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24389 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24390 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24391 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24392 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24393 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24394 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24395 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24396 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24397 { VOIDmode, NULL, NULL }
24401 arm_mangle_type (const_tree type)
24403 arm_mangle_map_entry *pos = arm_mangle_map;
24405 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24406 has to be managled as if it is in the "std" namespace. */
24407 if (TARGET_AAPCS_BASED
24408 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24410 static bool warned;
24411 if (!warned && warn_psabi && !in_system_header)
24414 inform (input_location,
24415 "the mangling of %<va_list%> has changed in GCC 4.4");
24417 return "St9__va_list";
24420 /* Half-precision float. */
24421 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24424 if (TREE_CODE (type) != VECTOR_TYPE)
24427 /* Check the mode of the vector type, and the name of the vector
24428 element type, against the table. */
24429 while (pos->mode != VOIDmode)
24431 tree elt_type = TREE_TYPE (type);
24433 if (pos->mode == TYPE_MODE (type)
24434 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24435 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24436 pos->element_type_name))
24437 return pos->aapcs_name;
24442 /* Use the default mangling for unrecognized (possibly user-defined)
24447 /* Order of allocation of core registers for Thumb: this allocation is
24448 written over the corresponding initial entries of the array
24449 initialized with REG_ALLOC_ORDER. We allocate all low registers
24450 first. Saving and restoring a low register is usually cheaper than
24451 using a call-clobbered high register. */
24453 static const int thumb_core_reg_alloc_order[] =
24455 3, 2, 1, 0, 4, 5, 6, 7,
24456 14, 12, 8, 9, 10, 11, 13, 15
24459 /* Adjust register allocation order when compiling for Thumb. */
24462 arm_order_regs_for_local_alloc (void)
24464 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24465 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24467 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24468 sizeof (thumb_core_reg_alloc_order));
24471 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24474 arm_frame_pointer_required (void)
24476 return (cfun->has_nonlocal_label
24477 || SUBTARGET_FRAME_POINTER_REQUIRED
24478 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24481 /* Only thumb1 can't support conditional execution, so return true if
24482 the target is not thumb1. */
24484 arm_have_conditional_execution (void)
24486 return !TARGET_THUMB1;
24489 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
24490 static HOST_WIDE_INT
24491 arm_vector_alignment (const_tree type)
24493 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
24495 if (TARGET_AAPCS_BASED)
24496 align = MIN (align, 64);
24501 static unsigned int
24502 arm_autovectorize_vector_sizes (void)
24504 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24508 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24510 /* Vectors which aren't in packed structures will not be less aligned than
24511 the natural alignment of their element type, so this is safe. */
24512 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24515 return default_builtin_vector_alignment_reachable (type, is_packed);
24519 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24520 const_tree type, int misalignment,
24523 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24525 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24530 /* If the misalignment is unknown, we should be able to handle the access
24531 so long as it is not to a member of a packed data structure. */
24532 if (misalignment == -1)
24535 /* Return true if the misalignment is a multiple of the natural alignment
24536 of the vector's element type. This is probably always going to be
24537 true in practice, since we've already established that this isn't a
24539 return ((misalignment % align) == 0);
24542 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24547 arm_conditional_register_usage (void)
24551 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24553 for (regno = FIRST_FPA_REGNUM;
24554 regno <= LAST_FPA_REGNUM; ++regno)
24555 fixed_regs[regno] = call_used_regs[regno] = 1;
24558 if (TARGET_THUMB1 && optimize_size)
24560 /* When optimizing for size on Thumb-1, it's better not
24561 to use the HI regs, because of the overhead of
24563 for (regno = FIRST_HI_REGNUM;
24564 regno <= LAST_HI_REGNUM; ++regno)
24565 fixed_regs[regno] = call_used_regs[regno] = 1;
24568 /* The link register can be clobbered by any branch insn,
24569 but we have no way to track that at present, so mark
24570 it as unavailable. */
24572 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24574 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24576 if (TARGET_MAVERICK)
24578 for (regno = FIRST_FPA_REGNUM;
24579 regno <= LAST_FPA_REGNUM; ++ regno)
24580 fixed_regs[regno] = call_used_regs[regno] = 1;
24581 for (regno = FIRST_CIRRUS_FP_REGNUM;
24582 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24584 fixed_regs[regno] = 0;
24585 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24590 /* VFPv3 registers are disabled when earlier VFP
24591 versions are selected due to the definition of
24592 LAST_VFP_REGNUM. */
24593 for (regno = FIRST_VFP_REGNUM;
24594 regno <= LAST_VFP_REGNUM; ++ regno)
24596 fixed_regs[regno] = 0;
24597 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24598 || regno >= FIRST_VFP_REGNUM + 32;
24603 if (TARGET_REALLY_IWMMXT)
24605 regno = FIRST_IWMMXT_GR_REGNUM;
24606 /* The 2002/10/09 revision of the XScale ABI has wCG0
24607 and wCG1 as call-preserved registers. The 2002/11/21
24608 revision changed this so that all wCG registers are
24609 scratch registers. */
24610 for (regno = FIRST_IWMMXT_GR_REGNUM;
24611 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24612 fixed_regs[regno] = 0;
24613 /* The XScale ABI has wR0 - wR9 as scratch registers,
24614 the rest as call-preserved registers. */
24615 for (regno = FIRST_IWMMXT_REGNUM;
24616 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24618 fixed_regs[regno] = 0;
24619 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24623 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24625 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24626 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24628 else if (TARGET_APCS_STACK)
24630 fixed_regs[10] = 1;
24631 call_used_regs[10] = 1;
24633 /* -mcaller-super-interworking reserves r11 for calls to
24634 _interwork_r11_call_via_rN(). Making the register global
24635 is an easy way of ensuring that it remains valid for all
24637 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24638 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24640 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24641 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24642 if (TARGET_CALLER_INTERWORKING)
24643 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24645 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24649 arm_preferred_rename_class (reg_class_t rclass)
24651 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24652 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24653 and code size can be reduced. */
24654 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24660 /* Compute the atrribute "length" of insn "*push_multi".
24661 So this function MUST be kept in sync with that insn pattern. */
24663 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24665 int i, regno, hi_reg;
24666 int num_saves = XVECLEN (parallel_op, 0);
24676 regno = REGNO (first_op);
24677 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24678 for (i = 1; i < num_saves && !hi_reg; i++)
24680 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24681 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24689 /* Compute the number of instructions emitted by output_move_double. */
24691 arm_count_output_move_double_insns (rtx *operands)
24695 /* output_move_double may modify the operands array, so call it
24696 here on a copy of the array. */
24697 ops[0] = operands[0];
24698 ops[1] = operands[1];
24699 output_move_double (ops, false, &count);
24704 vfp3_const_double_for_fract_bits (rtx operand)
24706 REAL_VALUE_TYPE r0;
24708 if (GET_CODE (operand) != CONST_DOUBLE)
24711 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
24712 if (exact_real_inverse (DFmode, &r0))
24714 if (exact_real_truncate (DFmode, &r0))
24716 HOST_WIDE_INT value = real_to_integer (&r0);
24717 value = value & 0xffffffff;
24718 if ((value != 0) && ( (value & (value - 1)) == 0))
24719 return int_log2 (value);
24725 /* Emit a memory barrier around an atomic sequence according to MODEL. */
24728 arm_pre_atomic_barrier (enum memmodel model)
24732 case MEMMODEL_RELAXED:
24733 case MEMMODEL_CONSUME:
24734 case MEMMODEL_ACQUIRE:
24736 case MEMMODEL_RELEASE:
24737 case MEMMODEL_ACQ_REL:
24738 case MEMMODEL_SEQ_CST:
24739 emit_insn (gen_memory_barrier ());
24742 gcc_unreachable ();
24747 arm_post_atomic_barrier (enum memmodel model)
24751 case MEMMODEL_RELAXED:
24752 case MEMMODEL_CONSUME:
24753 case MEMMODEL_RELEASE:
24755 case MEMMODEL_ACQUIRE:
24756 case MEMMODEL_ACQ_REL:
24757 case MEMMODEL_SEQ_CST:
24758 emit_insn (gen_memory_barrier ());
24761 gcc_unreachable ();
24765 /* Emit the load-exclusive and store-exclusive instructions. */
24768 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
24770 rtx (*gen) (rtx, rtx);
24774 case QImode: gen = gen_arm_load_exclusiveqi; break;
24775 case HImode: gen = gen_arm_load_exclusivehi; break;
24776 case SImode: gen = gen_arm_load_exclusivesi; break;
24777 case DImode: gen = gen_arm_load_exclusivedi; break;
24779 gcc_unreachable ();
24782 emit_insn (gen (rval, mem));
24786 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
24788 rtx (*gen) (rtx, rtx, rtx);
24792 case QImode: gen = gen_arm_store_exclusiveqi; break;
24793 case HImode: gen = gen_arm_store_exclusivehi; break;
24794 case SImode: gen = gen_arm_store_exclusivesi; break;
24795 case DImode: gen = gen_arm_store_exclusivedi; break;
24797 gcc_unreachable ();
24800 emit_insn (gen (bval, rval, mem));
24803 /* Mark the previous jump instruction as unlikely. */
24806 emit_unlikely_jump (rtx insn)
24808 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
24810 insn = emit_jump_insn (insn);
24811 add_reg_note (insn, REG_BR_PROB, very_unlikely);
24814 /* Expand a compare and swap pattern. */
24817 arm_expand_compare_and_swap (rtx operands[])
24819 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
24820 enum machine_mode mode;
24821 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
24823 bval = operands[0];
24824 rval = operands[1];
24826 oldval = operands[3];
24827 newval = operands[4];
24828 is_weak = operands[5];
24829 mod_s = operands[6];
24830 mod_f = operands[7];
24831 mode = GET_MODE (mem);
24837 /* For narrow modes, we're going to perform the comparison in SImode,
24838 so do the zero-extension now. */
24839 rval = gen_reg_rtx (SImode);
24840 oldval = convert_modes (SImode, mode, oldval, true);
24844 /* Force the value into a register if needed. We waited until after
24845 the zero-extension above to do this properly. */
24846 if (!arm_add_operand (oldval, mode))
24847 oldval = force_reg (mode, oldval);
24851 if (!cmpdi_operand (oldval, mode))
24852 oldval = force_reg (mode, oldval);
24856 gcc_unreachable ();
24861 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
24862 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
24863 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
24864 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
24866 gcc_unreachable ();
24869 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
24871 if (mode == QImode || mode == HImode)
24872 emit_move_insn (operands[1], gen_lowpart (mode, rval));
24874 /* In all cases, we arrange for success to be signaled by Z set.
24875 This arrangement allows for the boolean result to be used directly
24876 in a subsequent branch, post optimization. */
24877 x = gen_rtx_REG (CCmode, CC_REGNUM);
24878 x = gen_rtx_EQ (SImode, x, const0_rtx);
24879 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
24882 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
24883 another memory store between the load-exclusive and store-exclusive can
24884 reset the monitor from Exclusive to Open state. This means we must wait
24885 until after reload to split the pattern, lest we get a register spill in
24886 the middle of the atomic sequence. */
24889 arm_split_compare_and_swap (rtx operands[])
24891 rtx rval, mem, oldval, newval, scratch;
24892 enum machine_mode mode;
24893 enum memmodel mod_s, mod_f;
24895 rtx label1, label2, x, cond;
24897 rval = operands[0];
24899 oldval = operands[2];
24900 newval = operands[3];
24901 is_weak = (operands[4] != const0_rtx);
24902 mod_s = (enum memmodel) INTVAL (operands[5]);
24903 mod_f = (enum memmodel) INTVAL (operands[6]);
24904 scratch = operands[7];
24905 mode = GET_MODE (mem);
24907 arm_pre_atomic_barrier (mod_s);
24912 label1 = gen_label_rtx ();
24913 emit_label (label1);
24915 label2 = gen_label_rtx ();
24917 arm_emit_load_exclusive (mode, rval, mem);
24919 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
24920 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24921 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24922 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
24923 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24925 arm_emit_store_exclusive (mode, scratch, mem, newval);
24927 /* Weak or strong, we want EQ to be true for success, so that we
24928 match the flags that we got from the compare above. */
24929 cond = gen_rtx_REG (CCmode, CC_REGNUM);
24930 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
24931 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
24935 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24936 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24937 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
24938 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24941 if (mod_f != MEMMODEL_RELAXED)
24942 emit_label (label2);
24944 arm_post_atomic_barrier (mod_s);
24946 if (mod_f == MEMMODEL_RELAXED)
24947 emit_label (label2);
24951 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
24952 rtx value, rtx model_rtx, rtx cond)
24954 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
24955 enum machine_mode mode = GET_MODE (mem);
24956 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
24959 arm_pre_atomic_barrier (model);
24961 label = gen_label_rtx ();
24962 emit_label (label);
24965 new_out = gen_lowpart (wmode, new_out);
24967 old_out = gen_lowpart (wmode, old_out);
24970 value = simplify_gen_subreg (wmode, value, mode, 0);
24972 arm_emit_load_exclusive (mode, old_out, mem);
24981 x = gen_rtx_AND (wmode, old_out, value);
24982 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24983 x = gen_rtx_NOT (wmode, new_out);
24984 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24988 if (CONST_INT_P (value))
24990 value = GEN_INT (-INTVAL (value));
24996 if (mode == DImode)
24998 /* DImode plus/minus need to clobber flags. */
24999 /* The adddi3 and subdi3 patterns are incorrectly written so that
25000 they require matching operands, even when we could easily support
25001 three operands. Thankfully, this can be fixed up post-splitting,
25002 as the individual add+adc patterns do accept three operands and
25003 post-reload cprop can make these moves go away. */
25004 emit_move_insn (new_out, old_out);
25006 x = gen_adddi3 (new_out, new_out, value);
25008 x = gen_subdi3 (new_out, new_out, value);
25015 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25016 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25020 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25022 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25023 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25025 arm_post_atomic_barrier (model);
25028 #define MAX_VECT_LEN 16
25030 struct expand_vec_perm_d
25032 rtx target, op0, op1;
25033 unsigned char perm[MAX_VECT_LEN];
25034 enum machine_mode vmode;
25035 unsigned char nelt;
25040 /* Generate a variable permutation. */
25043 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25045 enum machine_mode vmode = GET_MODE (target);
25046 bool one_vector_p = rtx_equal_p (op0, op1);
25048 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25049 gcc_checking_assert (GET_MODE (op0) == vmode);
25050 gcc_checking_assert (GET_MODE (op1) == vmode);
25051 gcc_checking_assert (GET_MODE (sel) == vmode);
25052 gcc_checking_assert (TARGET_NEON);
25056 if (vmode == V8QImode)
25057 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25059 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25065 if (vmode == V8QImode)
25067 pair = gen_reg_rtx (V16QImode);
25068 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25069 pair = gen_lowpart (TImode, pair);
25070 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25074 pair = gen_reg_rtx (OImode);
25075 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25076 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25082 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25084 enum machine_mode vmode = GET_MODE (target);
25085 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25086 bool one_vector_p = rtx_equal_p (op0, op1);
25087 rtx rmask[MAX_VECT_LEN], mask;
25089 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25090 numbering of elements for big-endian, we must reverse the order. */
25091 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25093 /* The VTBL instruction does not use a modulo index, so we must take care
25094 of that ourselves. */
25095 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25096 for (i = 0; i < nelt; ++i)
25098 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25099 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25101 arm_expand_vec_perm_1 (target, op0, op1, sel);
25104 /* Generate or test for an insn that supports a constant permutation. */
25106 /* Recognize patterns for the VUZP insns. */
25109 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25111 unsigned int i, odd, mask, nelt = d->nelt;
25112 rtx out0, out1, in0, in1, x;
25113 rtx (*gen)(rtx, rtx, rtx, rtx);
25115 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25118 /* Note that these are little-endian tests. Adjust for big-endian later. */
25119 if (d->perm[0] == 0)
25121 else if (d->perm[0] == 1)
25125 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25127 for (i = 0; i < nelt; i++)
25129 unsigned elt = (i * 2 + odd) & mask;
25130 if (d->perm[i] != elt)
25140 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25141 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25142 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25143 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25144 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25145 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25146 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25147 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25149 gcc_unreachable ();
25154 if (BYTES_BIG_ENDIAN)
25156 x = in0, in0 = in1, in1 = x;
25161 out1 = gen_reg_rtx (d->vmode);
25163 x = out0, out0 = out1, out1 = x;
25165 emit_insn (gen (out0, in0, in1, out1));
25169 /* Recognize patterns for the VZIP insns. */
25172 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25174 unsigned int i, high, mask, nelt = d->nelt;
25175 rtx out0, out1, in0, in1, x;
25176 rtx (*gen)(rtx, rtx, rtx, rtx);
25178 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25181 /* Note that these are little-endian tests. Adjust for big-endian later. */
25183 if (d->perm[0] == high)
25185 else if (d->perm[0] == 0)
25189 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25191 for (i = 0; i < nelt / 2; i++)
25193 unsigned elt = (i + high) & mask;
25194 if (d->perm[i * 2] != elt)
25196 elt = (elt + nelt) & mask;
25197 if (d->perm[i * 2 + 1] != elt)
25207 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25208 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25209 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25210 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25211 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25212 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25213 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25214 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25216 gcc_unreachable ();
25221 if (BYTES_BIG_ENDIAN)
25223 x = in0, in0 = in1, in1 = x;
25228 out1 = gen_reg_rtx (d->vmode);
25230 x = out0, out0 = out1, out1 = x;
25232 emit_insn (gen (out0, in0, in1, out1));
25236 /* Recognize patterns for the VREV insns. */
25239 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25241 unsigned int i, j, diff, nelt = d->nelt;
25242 rtx (*gen)(rtx, rtx, rtx);
25244 if (!d->one_vector_p)
25253 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25254 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25262 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25263 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25264 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25265 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25273 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25274 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25275 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25276 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25277 case V4SImode: gen = gen_neon_vrev64v4si; break;
25278 case V2SImode: gen = gen_neon_vrev64v2si; break;
25279 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25280 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25289 for (i = 0; i < nelt ; i += diff + 1)
25290 for (j = 0; j <= diff; j += 1)
25292 /* This is guaranteed to be true as the value of diff
25293 is 7, 3, 1 and we should have enough elements in the
25294 queue to generate this. Getting a vector mask with a
25295 value of diff other than these values implies that
25296 something is wrong by the time we get here. */
25297 gcc_assert (i + j < nelt);
25298 if (d->perm[i + j] != i + diff - j)
25306 /* ??? The third operand is an artifact of the builtin infrastructure
25307 and is ignored by the actual instruction. */
25308 emit_insn (gen (d->target, d->op0, const0_rtx));
25312 /* Recognize patterns for the VTRN insns. */
25315 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25317 unsigned int i, odd, mask, nelt = d->nelt;
25318 rtx out0, out1, in0, in1, x;
25319 rtx (*gen)(rtx, rtx, rtx, rtx);
25321 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25324 /* Note that these are little-endian tests. Adjust for big-endian later. */
25325 if (d->perm[0] == 0)
25327 else if (d->perm[0] == 1)
25331 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25333 for (i = 0; i < nelt; i += 2)
25335 if (d->perm[i] != i + odd)
25337 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25347 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25348 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25349 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25350 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25351 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25352 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25353 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25354 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25356 gcc_unreachable ();
25361 if (BYTES_BIG_ENDIAN)
25363 x = in0, in0 = in1, in1 = x;
25368 out1 = gen_reg_rtx (d->vmode);
25370 x = out0, out0 = out1, out1 = x;
25372 emit_insn (gen (out0, in0, in1, out1));
25376 /* The NEON VTBL instruction is a fully variable permuation that's even
25377 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25378 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25379 can do slightly better by expanding this as a constant where we don't
25380 have to apply a mask. */
25383 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25385 rtx rperm[MAX_VECT_LEN], sel;
25386 enum machine_mode vmode = d->vmode;
25387 unsigned int i, nelt = d->nelt;
25389 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25390 numbering of elements for big-endian, we must reverse the order. */
25391 if (BYTES_BIG_ENDIAN)
25397 /* Generic code will try constant permutation twice. Once with the
25398 original mode and again with the elements lowered to QImode.
25399 So wait and don't do the selector expansion ourselves. */
25400 if (vmode != V8QImode && vmode != V16QImode)
25403 for (i = 0; i < nelt; ++i)
25404 rperm[i] = GEN_INT (d->perm[i]);
25405 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25406 sel = force_reg (vmode, sel);
25408 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25413 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25415 /* The pattern matching functions above are written to look for a small
25416 number to begin the sequence (0, 1, N/2). If we begin with an index
25417 from the second operand, we can swap the operands. */
25418 if (d->perm[0] >= d->nelt)
25420 unsigned i, nelt = d->nelt;
25423 for (i = 0; i < nelt; ++i)
25424 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25433 if (arm_evpc_neon_vuzp (d))
25435 if (arm_evpc_neon_vzip (d))
25437 if (arm_evpc_neon_vrev (d))
25439 if (arm_evpc_neon_vtrn (d))
25441 return arm_evpc_neon_vtbl (d);
25446 /* Expand a vec_perm_const pattern. */
25449 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25451 struct expand_vec_perm_d d;
25452 int i, nelt, which;
25458 d.vmode = GET_MODE (target);
25459 gcc_assert (VECTOR_MODE_P (d.vmode));
25460 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25461 d.testing_p = false;
25463 for (i = which = 0; i < nelt; ++i)
25465 rtx e = XVECEXP (sel, 0, i);
25466 int ei = INTVAL (e) & (2 * nelt - 1);
25467 which |= (ei < nelt ? 1 : 2);
25477 d.one_vector_p = false;
25478 if (!rtx_equal_p (op0, op1))
25481 /* The elements of PERM do not suggest that only the first operand
25482 is used, but both operands are identical. Allow easier matching
25483 of the permutation by folding the permutation into the single
25487 for (i = 0; i < nelt; ++i)
25488 d.perm[i] &= nelt - 1;
25490 d.one_vector_p = true;
25495 d.one_vector_p = true;
25499 return arm_expand_vec_perm_const_1 (&d);
25502 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25505 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25506 const unsigned char *sel)
25508 struct expand_vec_perm_d d;
25509 unsigned int i, nelt, which;
25513 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25514 d.testing_p = true;
25515 memcpy (d.perm, sel, nelt);
25517 /* Categorize the set of elements in the selector. */
25518 for (i = which = 0; i < nelt; ++i)
25520 unsigned char e = d.perm[i];
25521 gcc_assert (e < 2 * nelt);
25522 which |= (e < nelt ? 1 : 2);
25525 /* For all elements from second vector, fold the elements to first. */
25527 for (i = 0; i < nelt; ++i)
25530 /* Check whether the mask can be applied to the vector type. */
25531 d.one_vector_p = (which != 3);
25533 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25534 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25535 if (!d.one_vector_p)
25536 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25539 ret = arm_expand_vec_perm_const_1 (&d);
25546 #include "gt-arm.h"