1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static bool arm_memory_load_p (rtx);
167 static bool arm_cirrus_insn_p (rtx);
168 static void cirrus_reorg (rtx);
169 static void arm_init_builtins (void);
170 static void arm_init_iwmmxt_builtins (void);
171 static rtx safe_vector_operand (rtx, enum machine_mode);
172 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
173 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
174 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
175 static tree arm_builtin_decl (unsigned, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx emit_set_insn (rtx, rtx);
178 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
180 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
182 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
184 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
187 static int aapcs_select_return_coproc (const_tree, const_tree);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_encode_section_info (tree, rtx, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
216 static rtx arm_dwarf_register_span (rtx);
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
269 /* Table of machine attributes. */
270 static const struct attribute_spec arm_attribute_table[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
273 affects_type_identity } */
274 /* Function calls made to this symbol must be done indirectly, because
275 it may lie outside of the 26 bit addressing range of a normal function
277 { "long_call", 0, 0, false, true, true, NULL, false },
278 /* Whereas these functions are always known to reside within the 26 bit
280 { "short_call", 0, 0, false, true, true, NULL, false },
281 /* Specify the procedure call conventions for a function. */
282 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
284 /* Interrupt Service Routines have special prologue and epilogue requirements. */
285 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
287 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
289 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
292 /* ARM/PE has three new attributes:
294 dllexport - for exporting a function/variable that will live in a dll
295 dllimport - for importing a function/variable from a dll
297 Microsoft allows multiple declspecs in one __declspec, separating
298 them with spaces. We do NOT support this. Instead, use __declspec
301 { "dllimport", 0, 0, true, false, false, NULL, false },
302 { "dllexport", 0, 0, true, false, false, NULL, false },
303 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
305 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
306 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
307 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
308 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
311 { NULL, 0, 0, false, false, false, NULL, false }
314 /* Initialize the GCC target structure. */
315 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
316 #undef TARGET_MERGE_DECL_ATTRIBUTES
317 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
320 #undef TARGET_LEGITIMIZE_ADDRESS
321 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START arm_file_start
328 #undef TARGET_ASM_FILE_END
329 #define TARGET_ASM_FILE_END arm_file_end
331 #undef TARGET_ASM_ALIGNED_SI_OP
332 #define TARGET_ASM_ALIGNED_SI_OP NULL
333 #undef TARGET_ASM_INTEGER
334 #define TARGET_ASM_INTEGER arm_assemble_integer
336 #undef TARGET_PRINT_OPERAND
337 #define TARGET_PRINT_OPERAND arm_print_operand
338 #undef TARGET_PRINT_OPERAND_ADDRESS
339 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
340 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
341 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
343 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
344 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
346 #undef TARGET_ASM_FUNCTION_PROLOGUE
347 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
349 #undef TARGET_ASM_FUNCTION_EPILOGUE
350 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
352 #undef TARGET_OPTION_OVERRIDE
353 #define TARGET_OPTION_OVERRIDE arm_option_override
355 #undef TARGET_COMP_TYPE_ATTRIBUTES
356 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
358 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
359 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
361 #undef TARGET_SCHED_ADJUST_COST
362 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
364 #undef TARGET_ENCODE_SECTION_INFO
366 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
368 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
371 #undef TARGET_STRIP_NAME_ENCODING
372 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
374 #undef TARGET_ASM_INTERNAL_LABEL
375 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
377 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
378 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
380 #undef TARGET_FUNCTION_VALUE
381 #define TARGET_FUNCTION_VALUE arm_function_value
383 #undef TARGET_LIBCALL_VALUE
384 #define TARGET_LIBCALL_VALUE arm_libcall_value
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
391 #undef TARGET_RTX_COSTS
392 #define TARGET_RTX_COSTS arm_rtx_costs
393 #undef TARGET_ADDRESS_COST
394 #define TARGET_ADDRESS_COST arm_address_cost
396 #undef TARGET_SHIFT_TRUNCATION_MASK
397 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
398 #undef TARGET_VECTOR_MODE_SUPPORTED_P
399 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
400 #undef TARGET_ARRAY_MODE_SUPPORTED_P
401 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
402 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
403 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
406 arm_autovectorize_vector_sizes
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
411 #undef TARGET_INIT_BUILTINS
412 #define TARGET_INIT_BUILTINS arm_init_builtins
413 #undef TARGET_EXPAND_BUILTIN
414 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
415 #undef TARGET_BUILTIN_DECL
416 #define TARGET_BUILTIN_DECL arm_builtin_decl
418 #undef TARGET_INIT_LIBFUNCS
419 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
421 #undef TARGET_PROMOTE_FUNCTION_MODE
422 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
423 #undef TARGET_PROMOTE_PROTOTYPES
424 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
425 #undef TARGET_PASS_BY_REFERENCE
426 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
427 #undef TARGET_ARG_PARTIAL_BYTES
428 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
429 #undef TARGET_FUNCTION_ARG
430 #define TARGET_FUNCTION_ARG arm_function_arg
431 #undef TARGET_FUNCTION_ARG_ADVANCE
432 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
433 #undef TARGET_FUNCTION_ARG_BOUNDARY
434 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
436 #undef TARGET_SETUP_INCOMING_VARARGS
437 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
439 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
440 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
442 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
443 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
444 #undef TARGET_TRAMPOLINE_INIT
445 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
446 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
447 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
449 #undef TARGET_DEFAULT_SHORT_ENUMS
450 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
452 #undef TARGET_ALIGN_ANON_BITFIELD
453 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
455 #undef TARGET_NARROW_VOLATILE_BITFIELD
456 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
458 #undef TARGET_CXX_GUARD_TYPE
459 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
461 #undef TARGET_CXX_GUARD_MASK_BIT
462 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
464 #undef TARGET_CXX_GET_COOKIE_SIZE
465 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
467 #undef TARGET_CXX_COOKIE_HAS_SIZE
468 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
470 #undef TARGET_CXX_CDTOR_RETURNS_THIS
471 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
473 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
474 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
476 #undef TARGET_CXX_USE_AEABI_ATEXIT
477 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
479 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
480 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
481 arm_cxx_determine_class_data_visibility
483 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
484 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
486 #undef TARGET_RETURN_IN_MSB
487 #define TARGET_RETURN_IN_MSB arm_return_in_msb
489 #undef TARGET_RETURN_IN_MEMORY
490 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
492 #undef TARGET_MUST_PASS_IN_STACK
493 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
496 #undef TARGET_ASM_UNWIND_EMIT
497 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
499 /* EABI unwinding tables use a different format for the typeinfo tables. */
500 #undef TARGET_ASM_TTYPE
501 #define TARGET_ASM_TTYPE arm_output_ttype
503 #undef TARGET_ARM_EABI_UNWINDER
504 #define TARGET_ARM_EABI_UNWINDER true
506 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
507 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
509 #undef TARGET_ASM_INIT_SECTIONS
510 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
511 #endif /* ARM_UNWIND_INFO */
513 #undef TARGET_DWARF_REGISTER_SPAN
514 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
516 #undef TARGET_CANNOT_COPY_INSN_P
517 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
520 #undef TARGET_HAVE_TLS
521 #define TARGET_HAVE_TLS true
524 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
525 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
527 #undef TARGET_LEGITIMATE_CONSTANT_P
528 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
530 #undef TARGET_CANNOT_FORCE_CONST_MEM
531 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
533 #undef TARGET_MAX_ANCHOR_OFFSET
534 #define TARGET_MAX_ANCHOR_OFFSET 4095
536 /* The minimum is set such that the total size of the block
537 for a particular anchor is -4088 + 1 + 4095 bytes, which is
538 divisible by eight, ensuring natural spacing of anchors. */
539 #undef TARGET_MIN_ANCHOR_OFFSET
540 #define TARGET_MIN_ANCHOR_OFFSET -4088
542 #undef TARGET_SCHED_ISSUE_RATE
543 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
545 #undef TARGET_MANGLE_TYPE
546 #define TARGET_MANGLE_TYPE arm_mangle_type
548 #undef TARGET_BUILD_BUILTIN_VA_LIST
549 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
550 #undef TARGET_EXPAND_BUILTIN_VA_START
551 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
553 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
556 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
557 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
560 #undef TARGET_LEGITIMATE_ADDRESS_P
561 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
563 #undef TARGET_INVALID_PARAMETER_TYPE
564 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
566 #undef TARGET_INVALID_RETURN_TYPE
567 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
569 #undef TARGET_PROMOTED_TYPE
570 #define TARGET_PROMOTED_TYPE arm_promoted_type
572 #undef TARGET_CONVERT_TO_TYPE
573 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
575 #undef TARGET_SCALAR_MODE_SUPPORTED_P
576 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
578 #undef TARGET_FRAME_POINTER_REQUIRED
579 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
581 #undef TARGET_CAN_ELIMINATE
582 #define TARGET_CAN_ELIMINATE arm_can_eliminate
584 #undef TARGET_CONDITIONAL_REGISTER_USAGE
585 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
587 #undef TARGET_CLASS_LIKELY_SPILLED_P
588 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
590 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
591 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
592 arm_vector_alignment_reachable
594 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
595 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
596 arm_builtin_support_vector_misalignment
598 #undef TARGET_PREFERRED_RENAME_CLASS
599 #define TARGET_PREFERRED_RENAME_CLASS \
600 arm_preferred_rename_class
602 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Obstack for minipool constant handling. */
605 static struct obstack minipool_obstack;
606 static char * minipool_startobj;
608 /* The maximum number of insns skipped which
609 will be conditionalised if possible. */
610 static int max_insns_skipped = 5;
612 extern FILE * asm_out_file;
614 /* True if we are currently building a constant table. */
615 int making_const_table;
617 /* The processor for which instructions should be scheduled. */
618 enum processor_type arm_tune = arm_none;
620 /* The current tuning set. */
621 const struct tune_params *current_tune;
623 /* Which floating point hardware to schedule for. */
626 /* Which floating popint hardware to use. */
627 const struct arm_fpu_desc *arm_fpu_desc;
629 /* Used for Thumb call_via trampolines. */
630 rtx thumb_call_via_label[14];
631 static int thumb_call_reg_needed;
633 /* Bit values used to identify processor capabilities. */
634 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
635 #define FL_ARCH3M (1 << 1) /* Extended multiply */
636 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
637 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
638 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
639 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
640 #define FL_THUMB (1 << 6) /* Thumb aware */
641 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
642 #define FL_STRONG (1 << 8) /* StrongARM */
643 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
644 #define FL_XSCALE (1 << 10) /* XScale */
645 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
646 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
647 media instructions. */
648 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
649 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
650 Note: ARM6 & 7 derivatives only. */
651 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
652 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
653 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
655 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
656 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
657 #define FL_NEON (1 << 20) /* Neon instructions. */
658 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
660 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
661 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
663 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
665 /* Flags that only effect tuning, not available instructions. */
666 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
669 #define FL_FOR_ARCH2 FL_NOTM
670 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
671 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
672 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
673 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
674 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
675 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
676 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
677 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
678 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
679 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
680 #define FL_FOR_ARCH6J FL_FOR_ARCH6
681 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
682 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
683 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
684 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
685 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
686 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
687 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
688 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
689 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
690 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
692 /* The bits in this mask specify which
693 instructions we are allowed to generate. */
694 static unsigned long insn_flags = 0;
696 /* The bits in this mask specify which instruction scheduling options should
698 static unsigned long tune_flags = 0;
700 /* The following are used in the arm.md file as equivalents to bits
701 in the above two flag variables. */
703 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
706 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
709 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
721 /* Nonzero if this chip supports the ARM 6K extensions. */
724 /* Nonzero if this chip supports the ARM 7 extensions. */
727 /* Nonzero if instructions not present in the 'M' profile can be used. */
728 int arm_arch_notm = 0;
730 /* Nonzero if instructions present in ARMv7E-M can be used. */
733 /* Nonzero if this chip can benefit from load scheduling. */
734 int arm_ld_sched = 0;
736 /* Nonzero if this chip is a StrongARM. */
737 int arm_tune_strongarm = 0;
739 /* Nonzero if this chip is a Cirrus variant. */
740 int arm_arch_cirrus = 0;
742 /* Nonzero if this chip supports Intel Wireless MMX technology. */
743 int arm_arch_iwmmxt = 0;
745 /* Nonzero if this chip is an XScale. */
746 int arm_arch_xscale = 0;
748 /* Nonzero if tuning for XScale */
749 int arm_tune_xscale = 0;
751 /* Nonzero if we want to tune for stores that access the write-buffer.
752 This typically means an ARM6 or ARM7 with MMU or MPU. */
753 int arm_tune_wbuf = 0;
755 /* Nonzero if tuning for Cortex-A9. */
756 int arm_tune_cortex_a9 = 0;
758 /* Nonzero if generating Thumb instructions. */
761 /* Nonzero if generating Thumb-1 instructions. */
764 /* Nonzero if we should define __THUMB_INTERWORK__ in the
766 XXX This is a bit of a hack, it's intended to help work around
767 problems in GLD which doesn't understand that armv5t code is
768 interworking clean. */
769 int arm_cpp_interwork = 0;
771 /* Nonzero if chip supports Thumb 2. */
774 /* Nonzero if chip supports integer division instruction. */
775 int arm_arch_arm_hwdiv;
776 int arm_arch_thumb_hwdiv;
778 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
779 we must report the mode of the memory reference from
780 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
781 enum machine_mode output_memory_reference_mode;
783 /* The register number to be used for the PIC offset register. */
784 unsigned arm_pic_register = INVALID_REGNUM;
786 /* Set to 1 after arm_reorg has started. Reset to start at the start of
787 the next function. */
788 static int after_arm_reorg = 0;
790 enum arm_pcs arm_pcs_default;
792 /* For an explanation of these variables, see final_prescan_insn below. */
794 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
795 enum arm_cond_code arm_current_cc;
798 int arm_target_label;
799 /* The number of conditionally executed insns, including the current insn. */
800 int arm_condexec_count = 0;
801 /* A bitmask specifying the patterns for the IT block.
802 Zero means do not output an IT block before this insn. */
803 int arm_condexec_mask = 0;
804 /* The number of bits used in arm_condexec_mask. */
805 int arm_condexec_masklen = 0;
807 /* The condition codes of the ARM, and the inverse function. */
808 static const char * const arm_condition_codes[] =
810 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
811 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
814 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
815 int arm_regs_in_sequence[] =
817 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
820 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
821 #define streq(string1, string2) (strcmp (string1, string2) == 0)
823 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
824 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
825 | (1 << PIC_OFFSET_TABLE_REGNUM)))
827 /* Initialization code. */
831 const char *const name;
832 enum processor_type core;
834 const unsigned long flags;
835 const struct tune_params *const tune;
839 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
840 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
845 const struct tune_params arm_slowmul_tune =
847 arm_slowmul_rtx_costs,
849 3, /* Constant limit. */
850 5, /* Max cond insns. */
851 ARM_PREFETCH_NOT_BENEFICIAL,
852 true, /* Prefer constant pool. */
853 arm_default_branch_cost
856 const struct tune_params arm_fastmul_tune =
858 arm_fastmul_rtx_costs,
860 1, /* Constant limit. */
861 5, /* Max cond insns. */
862 ARM_PREFETCH_NOT_BENEFICIAL,
863 true, /* Prefer constant pool. */
864 arm_default_branch_cost
867 /* StrongARM has early execution of branches, so a sequence that is worth
868 skipping is shorter. Set max_insns_skipped to a lower value. */
870 const struct tune_params arm_strongarm_tune =
872 arm_fastmul_rtx_costs,
874 1, /* Constant limit. */
875 3, /* Max cond insns. */
876 ARM_PREFETCH_NOT_BENEFICIAL,
877 true, /* Prefer constant pool. */
878 arm_default_branch_cost
881 const struct tune_params arm_xscale_tune =
883 arm_xscale_rtx_costs,
884 xscale_sched_adjust_cost,
885 2, /* Constant limit. */
886 3, /* Max cond insns. */
887 ARM_PREFETCH_NOT_BENEFICIAL,
888 true, /* Prefer constant pool. */
889 arm_default_branch_cost
892 const struct tune_params arm_9e_tune =
896 1, /* Constant limit. */
897 5, /* Max cond insns. */
898 ARM_PREFETCH_NOT_BENEFICIAL,
899 true, /* Prefer constant pool. */
900 arm_default_branch_cost
903 const struct tune_params arm_v6t2_tune =
907 1, /* Constant limit. */
908 5, /* Max cond insns. */
909 ARM_PREFETCH_NOT_BENEFICIAL,
910 false, /* Prefer constant pool. */
911 arm_default_branch_cost
914 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
915 const struct tune_params arm_cortex_tune =
919 1, /* Constant limit. */
920 5, /* Max cond insns. */
921 ARM_PREFETCH_NOT_BENEFICIAL,
922 false, /* Prefer constant pool. */
923 arm_default_branch_cost
926 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
927 less appealing. Set max_insns_skipped to a low value. */
929 const struct tune_params arm_cortex_a5_tune =
933 1, /* Constant limit. */
934 1, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 false, /* Prefer constant pool. */
937 arm_cortex_a5_branch_cost
940 const struct tune_params arm_cortex_a9_tune =
943 cortex_a9_sched_adjust_cost,
944 1, /* Constant limit. */
945 5, /* Max cond insns. */
946 ARM_PREFETCH_BENEFICIAL(4,32,32),
947 false, /* Prefer constant pool. */
948 arm_default_branch_cost
951 const struct tune_params arm_fa726te_tune =
954 fa726te_sched_adjust_cost,
955 1, /* Constant limit. */
956 5, /* Max cond insns. */
957 ARM_PREFETCH_NOT_BENEFICIAL,
958 true, /* Prefer constant pool. */
959 arm_default_branch_cost
963 /* Not all of these give usefully different compilation alternatives,
964 but there is no simple way of generalizing them. */
965 static const struct processors all_cores[] =
968 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
969 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
970 #include "arm-cores.def"
972 {NULL, arm_none, NULL, 0, NULL}
975 static const struct processors all_architectures[] =
977 /* ARM Architectures */
978 /* We don't specify tuning costs here as it will be figured out
981 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
982 {NAME, CORE, #ARCH, FLAGS, NULL},
983 #include "arm-arches.def"
985 {NULL, arm_none, NULL, 0 , NULL}
989 /* These are populated as commandline arguments are processed, or NULL
991 static const struct processors *arm_selected_arch;
992 static const struct processors *arm_selected_cpu;
993 static const struct processors *arm_selected_tune;
995 /* The name of the preprocessor macro to define for this architecture. */
997 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
999 /* Available values for -mfpu=. */
1001 static const struct arm_fpu_desc all_fpus[] =
1003 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1004 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1005 #include "arm-fpus.def"
1010 /* Supported TLS relocations. */
1018 TLS_DESCSEQ /* GNU scheme */
1021 /* The maximum number of insns to be used when loading a constant. */
1023 arm_constant_limit (bool size_p)
1025 return size_p ? 1 : current_tune->constant_limit;
1028 /* Emit an insn that's a simple single-set. Both the operands must be known
1031 emit_set_insn (rtx x, rtx y)
1033 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1036 /* Return the number of bits set in VALUE. */
1038 bit_count (unsigned long value)
1040 unsigned long count = 0;
1045 value &= value - 1; /* Clear the least-significant set bit. */
1053 enum machine_mode mode;
1055 } arm_fixed_mode_set;
1057 /* A small helper for setting fixed-point library libfuncs. */
1060 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1061 const char *funcname, const char *modename,
1066 if (num_suffix == 0)
1067 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1069 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1071 set_optab_libfunc (optable, mode, buffer);
1075 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1076 enum machine_mode from, const char *funcname,
1077 const char *toname, const char *fromname)
1080 const char *maybe_suffix_2 = "";
1082 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1083 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1084 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1085 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1086 maybe_suffix_2 = "2";
1088 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1091 set_conv_libfunc (optable, to, from, buffer);
1094 /* Set up library functions unique to ARM. */
1097 arm_init_libfuncs (void)
1099 /* For Linux, we have access to kernel support for atomic operations. */
1100 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1101 init_sync_libfuncs (2 * UNITS_PER_WORD);
1103 /* There are no special library functions unless we are using the
1108 /* The functions below are described in Section 4 of the "Run-Time
1109 ABI for the ARM architecture", Version 1.0. */
1111 /* Double-precision floating-point arithmetic. Table 2. */
1112 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1113 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1114 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1115 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1116 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1118 /* Double-precision comparisons. Table 3. */
1119 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1120 set_optab_libfunc (ne_optab, DFmode, NULL);
1121 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1122 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1123 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1124 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1125 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1127 /* Single-precision floating-point arithmetic. Table 4. */
1128 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1129 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1130 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1131 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1132 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1134 /* Single-precision comparisons. Table 5. */
1135 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1136 set_optab_libfunc (ne_optab, SFmode, NULL);
1137 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1138 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1139 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1140 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1141 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1143 /* Floating-point to integer conversions. Table 6. */
1144 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1145 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1146 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1147 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1148 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1149 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1150 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1151 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1153 /* Conversions between floating types. Table 7. */
1154 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1155 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1157 /* Integer to floating-point conversions. Table 8. */
1158 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1159 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1160 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1161 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1162 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1163 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1164 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1165 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1167 /* Long long. Table 9. */
1168 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1169 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1170 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1171 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1172 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1173 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1174 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1175 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1177 /* Integer (32/32->32) division. \S 4.3.1. */
1178 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1179 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1181 /* The divmod functions are designed so that they can be used for
1182 plain division, even though they return both the quotient and the
1183 remainder. The quotient is returned in the usual location (i.e.,
1184 r0 for SImode, {r0, r1} for DImode), just as would be expected
1185 for an ordinary division routine. Because the AAPCS calling
1186 conventions specify that all of { r0, r1, r2, r3 } are
1187 callee-saved registers, there is no need to tell the compiler
1188 explicitly that those registers are clobbered by these
1190 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1191 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1193 /* For SImode division the ABI provides div-without-mod routines,
1194 which are faster. */
1195 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1196 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1198 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1199 divmod libcalls instead. */
1200 set_optab_libfunc (smod_optab, DImode, NULL);
1201 set_optab_libfunc (umod_optab, DImode, NULL);
1202 set_optab_libfunc (smod_optab, SImode, NULL);
1203 set_optab_libfunc (umod_optab, SImode, NULL);
1205 /* Half-precision float operations. The compiler handles all operations
1206 with NULL libfuncs by converting the SFmode. */
1207 switch (arm_fp16_format)
1209 case ARM_FP16_FORMAT_IEEE:
1210 case ARM_FP16_FORMAT_ALTERNATIVE:
1213 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1214 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1216 : "__gnu_f2h_alternative"));
1217 set_conv_libfunc (sext_optab, SFmode, HFmode,
1218 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1220 : "__gnu_h2f_alternative"));
1223 set_optab_libfunc (add_optab, HFmode, NULL);
1224 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1225 set_optab_libfunc (smul_optab, HFmode, NULL);
1226 set_optab_libfunc (neg_optab, HFmode, NULL);
1227 set_optab_libfunc (sub_optab, HFmode, NULL);
1230 set_optab_libfunc (eq_optab, HFmode, NULL);
1231 set_optab_libfunc (ne_optab, HFmode, NULL);
1232 set_optab_libfunc (lt_optab, HFmode, NULL);
1233 set_optab_libfunc (le_optab, HFmode, NULL);
1234 set_optab_libfunc (ge_optab, HFmode, NULL);
1235 set_optab_libfunc (gt_optab, HFmode, NULL);
1236 set_optab_libfunc (unord_optab, HFmode, NULL);
1243 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1245 const arm_fixed_mode_set fixed_arith_modes[] =
1266 const arm_fixed_mode_set fixed_conv_modes[] =
1296 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1298 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1299 "add", fixed_arith_modes[i].name, 3);
1300 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1301 "ssadd", fixed_arith_modes[i].name, 3);
1302 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1303 "usadd", fixed_arith_modes[i].name, 3);
1304 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1305 "sub", fixed_arith_modes[i].name, 3);
1306 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1307 "sssub", fixed_arith_modes[i].name, 3);
1308 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1309 "ussub", fixed_arith_modes[i].name, 3);
1310 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1311 "mul", fixed_arith_modes[i].name, 3);
1312 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1313 "ssmul", fixed_arith_modes[i].name, 3);
1314 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1315 "usmul", fixed_arith_modes[i].name, 3);
1316 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1317 "div", fixed_arith_modes[i].name, 3);
1318 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1319 "udiv", fixed_arith_modes[i].name, 3);
1320 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1321 "ssdiv", fixed_arith_modes[i].name, 3);
1322 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1323 "usdiv", fixed_arith_modes[i].name, 3);
1324 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1325 "neg", fixed_arith_modes[i].name, 2);
1326 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1327 "ssneg", fixed_arith_modes[i].name, 2);
1328 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1329 "usneg", fixed_arith_modes[i].name, 2);
1330 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1331 "ashl", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1333 "ashr", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1335 "lshr", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1337 "ssashl", fixed_arith_modes[i].name, 3);
1338 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1339 "usashl", fixed_arith_modes[i].name, 3);
1340 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1341 "cmp", fixed_arith_modes[i].name, 2);
1344 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1345 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1348 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1349 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1352 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1353 fixed_conv_modes[j].mode, "fract",
1354 fixed_conv_modes[i].name,
1355 fixed_conv_modes[j].name);
1356 arm_set_fixed_conv_libfunc (satfract_optab,
1357 fixed_conv_modes[i].mode,
1358 fixed_conv_modes[j].mode, "satfract",
1359 fixed_conv_modes[i].name,
1360 fixed_conv_modes[j].name);
1361 arm_set_fixed_conv_libfunc (fractuns_optab,
1362 fixed_conv_modes[i].mode,
1363 fixed_conv_modes[j].mode, "fractuns",
1364 fixed_conv_modes[i].name,
1365 fixed_conv_modes[j].name);
1366 arm_set_fixed_conv_libfunc (satfractuns_optab,
1367 fixed_conv_modes[i].mode,
1368 fixed_conv_modes[j].mode, "satfractuns",
1369 fixed_conv_modes[i].name,
1370 fixed_conv_modes[j].name);
1374 if (TARGET_AAPCS_BASED)
1375 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1378 /* On AAPCS systems, this is the "struct __va_list". */
1379 static GTY(()) tree va_list_type;
1381 /* Return the type to use as __builtin_va_list. */
1383 arm_build_builtin_va_list (void)
1388 if (!TARGET_AAPCS_BASED)
1389 return std_build_builtin_va_list ();
1391 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1399 The C Library ABI further reinforces this definition in \S
1402 We must follow this definition exactly. The structure tag
1403 name is visible in C++ mangled names, and thus forms a part
1404 of the ABI. The field name may be used by people who
1405 #include <stdarg.h>. */
1406 /* Create the type. */
1407 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1408 /* Give it the required name. */
1409 va_list_name = build_decl (BUILTINS_LOCATION,
1411 get_identifier ("__va_list"),
1413 DECL_ARTIFICIAL (va_list_name) = 1;
1414 TYPE_NAME (va_list_type) = va_list_name;
1415 TYPE_STUB_DECL (va_list_type) = va_list_name;
1416 /* Create the __ap field. */
1417 ap_field = build_decl (BUILTINS_LOCATION,
1419 get_identifier ("__ap"),
1421 DECL_ARTIFICIAL (ap_field) = 1;
1422 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1423 TYPE_FIELDS (va_list_type) = ap_field;
1424 /* Compute its layout. */
1425 layout_type (va_list_type);
1427 return va_list_type;
1430 /* Return an expression of type "void *" pointing to the next
1431 available argument in a variable-argument list. VALIST is the
1432 user-level va_list object, of type __builtin_va_list. */
1434 arm_extract_valist_ptr (tree valist)
1436 if (TREE_TYPE (valist) == error_mark_node)
1437 return error_mark_node;
1439 /* On an AAPCS target, the pointer is stored within "struct
1441 if (TARGET_AAPCS_BASED)
1443 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1444 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1445 valist, ap_field, NULL_TREE);
1451 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1453 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1455 valist = arm_extract_valist_ptr (valist);
1456 std_expand_builtin_va_start (valist, nextarg);
1459 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1461 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1464 valist = arm_extract_valist_ptr (valist);
1465 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1468 /* Fix up any incompatible options that the user has specified. */
1470 arm_option_override (void)
1472 if (global_options_set.x_arm_arch_option)
1473 arm_selected_arch = &all_architectures[arm_arch_option];
1475 if (global_options_set.x_arm_cpu_option)
1476 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1478 if (global_options_set.x_arm_tune_option)
1479 arm_selected_tune = &all_cores[(int) arm_tune_option];
1481 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1482 SUBTARGET_OVERRIDE_OPTIONS;
1485 if (arm_selected_arch)
1487 if (arm_selected_cpu)
1489 /* Check for conflict between mcpu and march. */
1490 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1492 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1493 arm_selected_cpu->name, arm_selected_arch->name);
1494 /* -march wins for code generation.
1495 -mcpu wins for default tuning. */
1496 if (!arm_selected_tune)
1497 arm_selected_tune = arm_selected_cpu;
1499 arm_selected_cpu = arm_selected_arch;
1503 arm_selected_arch = NULL;
1506 /* Pick a CPU based on the architecture. */
1507 arm_selected_cpu = arm_selected_arch;
1510 /* If the user did not specify a processor, choose one for them. */
1511 if (!arm_selected_cpu)
1513 const struct processors * sel;
1514 unsigned int sought;
1516 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1517 if (!arm_selected_cpu->name)
1519 #ifdef SUBTARGET_CPU_DEFAULT
1520 /* Use the subtarget default CPU if none was specified by
1522 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1524 /* Default to ARM6. */
1525 if (!arm_selected_cpu->name)
1526 arm_selected_cpu = &all_cores[arm6];
1529 sel = arm_selected_cpu;
1530 insn_flags = sel->flags;
1532 /* Now check to see if the user has specified some command line
1533 switch that require certain abilities from the cpu. */
1536 if (TARGET_INTERWORK || TARGET_THUMB)
1538 sought |= (FL_THUMB | FL_MODE32);
1540 /* There are no ARM processors that support both APCS-26 and
1541 interworking. Therefore we force FL_MODE26 to be removed
1542 from insn_flags here (if it was set), so that the search
1543 below will always be able to find a compatible processor. */
1544 insn_flags &= ~FL_MODE26;
1547 if (sought != 0 && ((sought & insn_flags) != sought))
1549 /* Try to locate a CPU type that supports all of the abilities
1550 of the default CPU, plus the extra abilities requested by
1552 for (sel = all_cores; sel->name != NULL; sel++)
1553 if ((sel->flags & sought) == (sought | insn_flags))
1556 if (sel->name == NULL)
1558 unsigned current_bit_count = 0;
1559 const struct processors * best_fit = NULL;
1561 /* Ideally we would like to issue an error message here
1562 saying that it was not possible to find a CPU compatible
1563 with the default CPU, but which also supports the command
1564 line options specified by the programmer, and so they
1565 ought to use the -mcpu=<name> command line option to
1566 override the default CPU type.
1568 If we cannot find a cpu that has both the
1569 characteristics of the default cpu and the given
1570 command line options we scan the array again looking
1571 for a best match. */
1572 for (sel = all_cores; sel->name != NULL; sel++)
1573 if ((sel->flags & sought) == sought)
1577 count = bit_count (sel->flags & insn_flags);
1579 if (count >= current_bit_count)
1582 current_bit_count = count;
1586 gcc_assert (best_fit);
1590 arm_selected_cpu = sel;
1594 gcc_assert (arm_selected_cpu);
1595 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1596 if (!arm_selected_tune)
1597 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1599 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1600 insn_flags = arm_selected_cpu->flags;
1602 arm_tune = arm_selected_tune->core;
1603 tune_flags = arm_selected_tune->flags;
1604 current_tune = arm_selected_tune->tune;
1606 /* Make sure that the processor choice does not conflict with any of the
1607 other command line choices. */
1608 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1609 error ("target CPU does not support ARM mode");
1611 /* BPABI targets use linker tricks to allow interworking on cores
1612 without thumb support. */
1613 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1615 warning (0, "target CPU does not support interworking" );
1616 target_flags &= ~MASK_INTERWORK;
1619 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1621 warning (0, "target CPU does not support THUMB instructions");
1622 target_flags &= ~MASK_THUMB;
1625 if (TARGET_APCS_FRAME && TARGET_THUMB)
1627 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1628 target_flags &= ~MASK_APCS_FRAME;
1631 /* Callee super interworking implies thumb interworking. Adding
1632 this to the flags here simplifies the logic elsewhere. */
1633 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1634 target_flags |= MASK_INTERWORK;
1636 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1637 from here where no function is being compiled currently. */
1638 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1639 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1641 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1642 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1644 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1646 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1647 target_flags |= MASK_APCS_FRAME;
1650 if (TARGET_POKE_FUNCTION_NAME)
1651 target_flags |= MASK_APCS_FRAME;
1653 if (TARGET_APCS_REENT && flag_pic)
1654 error ("-fpic and -mapcs-reent are incompatible");
1656 if (TARGET_APCS_REENT)
1657 warning (0, "APCS reentrant code not supported. Ignored");
1659 /* If this target is normally configured to use APCS frames, warn if they
1660 are turned off and debugging is turned on. */
1662 && write_symbols != NO_DEBUG
1663 && !TARGET_APCS_FRAME
1664 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1665 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1667 if (TARGET_APCS_FLOAT)
1668 warning (0, "passing floating point arguments in fp regs not yet supported");
1670 if (TARGET_LITTLE_WORDS)
1671 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1672 "will be removed in a future release");
1674 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1675 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1676 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1677 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1678 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1679 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1680 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1681 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1682 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1683 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1684 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1685 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1686 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1687 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1689 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1690 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1691 thumb_code = TARGET_ARM == 0;
1692 thumb1_code = TARGET_THUMB1 != 0;
1693 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1694 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1695 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1696 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1697 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1698 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1700 /* If we are not using the default (ARM mode) section anchor offset
1701 ranges, then set the correct ranges now. */
1704 /* Thumb-1 LDR instructions cannot have negative offsets.
1705 Permissible positive offset ranges are 5-bit (for byte loads),
1706 6-bit (for halfword loads), or 7-bit (for word loads).
1707 Empirical results suggest a 7-bit anchor range gives the best
1708 overall code size. */
1709 targetm.min_anchor_offset = 0;
1710 targetm.max_anchor_offset = 127;
1712 else if (TARGET_THUMB2)
1714 /* The minimum is set such that the total size of the block
1715 for a particular anchor is 248 + 1 + 4095 bytes, which is
1716 divisible by eight, ensuring natural spacing of anchors. */
1717 targetm.min_anchor_offset = -248;
1718 targetm.max_anchor_offset = 4095;
1721 /* V5 code we generate is completely interworking capable, so we turn off
1722 TARGET_INTERWORK here to avoid many tests later on. */
1724 /* XXX However, we must pass the right pre-processor defines to CPP
1725 or GLD can get confused. This is a hack. */
1726 if (TARGET_INTERWORK)
1727 arm_cpp_interwork = 1;
1730 target_flags &= ~MASK_INTERWORK;
1732 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1733 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1735 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1736 error ("iwmmxt abi requires an iwmmxt capable cpu");
1738 if (!global_options_set.x_arm_fpu_index)
1740 const char *target_fpu_name;
1743 #ifdef FPUTYPE_DEFAULT
1744 target_fpu_name = FPUTYPE_DEFAULT;
1746 if (arm_arch_cirrus)
1747 target_fpu_name = "maverick";
1749 target_fpu_name = "fpe2";
1752 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1757 arm_fpu_desc = &all_fpus[arm_fpu_index];
1759 switch (arm_fpu_desc->model)
1761 case ARM_FP_MODEL_FPA:
1762 if (arm_fpu_desc->rev == 2)
1763 arm_fpu_attr = FPU_FPE2;
1764 else if (arm_fpu_desc->rev == 3)
1765 arm_fpu_attr = FPU_FPE3;
1767 arm_fpu_attr = FPU_FPA;
1770 case ARM_FP_MODEL_MAVERICK:
1771 arm_fpu_attr = FPU_MAVERICK;
1774 case ARM_FP_MODEL_VFP:
1775 arm_fpu_attr = FPU_VFP;
1782 if (TARGET_AAPCS_BASED
1783 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1784 error ("FPA is unsupported in the AAPCS");
1786 if (TARGET_AAPCS_BASED)
1788 if (TARGET_CALLER_INTERWORKING)
1789 error ("AAPCS does not support -mcaller-super-interworking");
1791 if (TARGET_CALLEE_INTERWORKING)
1792 error ("AAPCS does not support -mcallee-super-interworking");
1795 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1796 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1797 will ever exist. GCC makes no attempt to support this combination. */
1798 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1799 sorry ("iWMMXt and hardware floating point");
1801 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1802 if (TARGET_THUMB2 && TARGET_IWMMXT)
1803 sorry ("Thumb-2 iWMMXt");
1805 /* __fp16 support currently assumes the core has ldrh. */
1806 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1807 sorry ("__fp16 and no ldrh");
1809 /* If soft-float is specified then don't use FPU. */
1810 if (TARGET_SOFT_FLOAT)
1811 arm_fpu_attr = FPU_NONE;
1813 if (TARGET_AAPCS_BASED)
1815 if (arm_abi == ARM_ABI_IWMMXT)
1816 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1817 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1818 && TARGET_HARD_FLOAT
1820 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1822 arm_pcs_default = ARM_PCS_AAPCS;
1826 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1827 sorry ("-mfloat-abi=hard and VFP");
1829 if (arm_abi == ARM_ABI_APCS)
1830 arm_pcs_default = ARM_PCS_APCS;
1832 arm_pcs_default = ARM_PCS_ATPCS;
1835 /* For arm2/3 there is no need to do any scheduling if there is only
1836 a floating point emulator, or we are doing software floating-point. */
1837 if ((TARGET_SOFT_FLOAT
1838 || (TARGET_FPA && arm_fpu_desc->rev))
1839 && (tune_flags & FL_MODE32) == 0)
1840 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1842 /* Use the cp15 method if it is available. */
1843 if (target_thread_pointer == TP_AUTO)
1845 if (arm_arch6k && !TARGET_THUMB1)
1846 target_thread_pointer = TP_CP15;
1848 target_thread_pointer = TP_SOFT;
1851 if (TARGET_HARD_TP && TARGET_THUMB1)
1852 error ("can not use -mtp=cp15 with 16-bit Thumb");
1854 /* Override the default structure alignment for AAPCS ABI. */
1855 if (!global_options_set.x_arm_structure_size_boundary)
1857 if (TARGET_AAPCS_BASED)
1858 arm_structure_size_boundary = 8;
1862 if (arm_structure_size_boundary != 8
1863 && arm_structure_size_boundary != 32
1864 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1866 if (ARM_DOUBLEWORD_ALIGN)
1868 "structure size boundary can only be set to 8, 32 or 64");
1870 warning (0, "structure size boundary can only be set to 8 or 32");
1871 arm_structure_size_boundary
1872 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1876 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1878 error ("RTP PIC is incompatible with Thumb");
1882 /* If stack checking is disabled, we can use r10 as the PIC register,
1883 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1884 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1886 if (TARGET_VXWORKS_RTP)
1887 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1888 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1891 if (flag_pic && TARGET_VXWORKS_RTP)
1892 arm_pic_register = 9;
1894 if (arm_pic_register_string != NULL)
1896 int pic_register = decode_reg_name (arm_pic_register_string);
1899 warning (0, "-mpic-register= is useless without -fpic");
1901 /* Prevent the user from choosing an obviously stupid PIC register. */
1902 else if (pic_register < 0 || call_used_regs[pic_register]
1903 || pic_register == HARD_FRAME_POINTER_REGNUM
1904 || pic_register == STACK_POINTER_REGNUM
1905 || pic_register >= PC_REGNUM
1906 || (TARGET_VXWORKS_RTP
1907 && (unsigned int) pic_register != arm_pic_register))
1908 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1910 arm_pic_register = pic_register;
1913 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1914 if (fix_cm3_ldrd == 2)
1916 if (arm_selected_cpu->core == cortexm3)
1922 /* Enable -munaligned-access by default for
1923 - all ARMv6 architecture-based processors
1924 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1926 Disable -munaligned-access by default for
1927 - all pre-ARMv6 architecture-based processors
1928 - ARMv6-M architecture-based processors. */
1930 if (unaligned_access == 2)
1932 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1933 unaligned_access = 1;
1935 unaligned_access = 0;
1937 else if (unaligned_access == 1
1938 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1940 warning (0, "target CPU does not support unaligned accesses");
1941 unaligned_access = 0;
1944 if (TARGET_THUMB1 && flag_schedule_insns)
1946 /* Don't warn since it's on by default in -O2. */
1947 flag_schedule_insns = 0;
1952 /* If optimizing for size, bump the number of instructions that we
1953 are prepared to conditionally execute (even on a StrongARM). */
1954 max_insns_skipped = 6;
1957 max_insns_skipped = current_tune->max_insns_skipped;
1959 /* Hot/Cold partitioning is not currently supported, since we can't
1960 handle literal pool placement in that case. */
1961 if (flag_reorder_blocks_and_partition)
1963 inform (input_location,
1964 "-freorder-blocks-and-partition not supported on this architecture");
1965 flag_reorder_blocks_and_partition = 0;
1966 flag_reorder_blocks = 1;
1970 /* Hoisting PIC address calculations more aggressively provides a small,
1971 but measurable, size reduction for PIC code. Therefore, we decrease
1972 the bar for unrestricted expression hoisting to the cost of PIC address
1973 calculation, which is 2 instructions. */
1974 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1975 global_options.x_param_values,
1976 global_options_set.x_param_values);
1978 /* ARM EABI defaults to strict volatile bitfields. */
1979 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1980 flag_strict_volatile_bitfields = 1;
1982 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1983 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1984 if (flag_prefetch_loop_arrays < 0
1987 && current_tune->num_prefetch_slots > 0)
1988 flag_prefetch_loop_arrays = 1;
1990 /* Set up parameters to be used in prefetching algorithm. Do not override the
1991 defaults unless we are tuning for a core we have researched values for. */
1992 if (current_tune->num_prefetch_slots > 0)
1993 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1994 current_tune->num_prefetch_slots,
1995 global_options.x_param_values,
1996 global_options_set.x_param_values);
1997 if (current_tune->l1_cache_line_size >= 0)
1998 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1999 current_tune->l1_cache_line_size,
2000 global_options.x_param_values,
2001 global_options_set.x_param_values);
2002 if (current_tune->l1_cache_size >= 0)
2003 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2004 current_tune->l1_cache_size,
2005 global_options.x_param_values,
2006 global_options_set.x_param_values);
2008 /* Register global variables with the garbage collector. */
2009 arm_add_gc_roots ();
2013 arm_add_gc_roots (void)
2015 gcc_obstack_init(&minipool_obstack);
2016 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2019 /* A table of known ARM exception types.
2020 For use with the interrupt function attribute. */
2024 const char *const arg;
2025 const unsigned long return_value;
2029 static const isr_attribute_arg isr_attribute_args [] =
2031 { "IRQ", ARM_FT_ISR },
2032 { "irq", ARM_FT_ISR },
2033 { "FIQ", ARM_FT_FIQ },
2034 { "fiq", ARM_FT_FIQ },
2035 { "ABORT", ARM_FT_ISR },
2036 { "abort", ARM_FT_ISR },
2037 { "ABORT", ARM_FT_ISR },
2038 { "abort", ARM_FT_ISR },
2039 { "UNDEF", ARM_FT_EXCEPTION },
2040 { "undef", ARM_FT_EXCEPTION },
2041 { "SWI", ARM_FT_EXCEPTION },
2042 { "swi", ARM_FT_EXCEPTION },
2043 { NULL, ARM_FT_NORMAL }
2046 /* Returns the (interrupt) function type of the current
2047 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2049 static unsigned long
2050 arm_isr_value (tree argument)
2052 const isr_attribute_arg * ptr;
2056 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2058 /* No argument - default to IRQ. */
2059 if (argument == NULL_TREE)
2062 /* Get the value of the argument. */
2063 if (TREE_VALUE (argument) == NULL_TREE
2064 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2065 return ARM_FT_UNKNOWN;
2067 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2069 /* Check it against the list of known arguments. */
2070 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2071 if (streq (arg, ptr->arg))
2072 return ptr->return_value;
2074 /* An unrecognized interrupt type. */
2075 return ARM_FT_UNKNOWN;
2078 /* Computes the type of the current function. */
2080 static unsigned long
2081 arm_compute_func_type (void)
2083 unsigned long type = ARM_FT_UNKNOWN;
2087 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2089 /* Decide if the current function is volatile. Such functions
2090 never return, and many memory cycles can be saved by not storing
2091 register values that will never be needed again. This optimization
2092 was added to speed up context switching in a kernel application. */
2094 && (TREE_NOTHROW (current_function_decl)
2095 || !(flag_unwind_tables
2097 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2098 && TREE_THIS_VOLATILE (current_function_decl))
2099 type |= ARM_FT_VOLATILE;
2101 if (cfun->static_chain_decl != NULL)
2102 type |= ARM_FT_NESTED;
2104 attr = DECL_ATTRIBUTES (current_function_decl);
2106 a = lookup_attribute ("naked", attr);
2108 type |= ARM_FT_NAKED;
2110 a = lookup_attribute ("isr", attr);
2112 a = lookup_attribute ("interrupt", attr);
2115 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2117 type |= arm_isr_value (TREE_VALUE (a));
2122 /* Returns the type of the current function. */
2125 arm_current_func_type (void)
2127 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2128 cfun->machine->func_type = arm_compute_func_type ();
2130 return cfun->machine->func_type;
2134 arm_allocate_stack_slots_for_args (void)
2136 /* Naked functions should not allocate stack slots for arguments. */
2137 return !IS_NAKED (arm_current_func_type ());
2141 /* Output assembler code for a block containing the constant parts
2142 of a trampoline, leaving space for the variable parts.
2144 On the ARM, (if r8 is the static chain regnum, and remembering that
2145 referencing pc adds an offset of 8) the trampoline looks like:
2148 .word static chain value
2149 .word function's address
2150 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2153 arm_asm_trampoline_template (FILE *f)
2157 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2158 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2160 else if (TARGET_THUMB2)
2162 /* The Thumb-2 trampoline is similar to the arm implementation.
2163 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2164 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2165 STATIC_CHAIN_REGNUM, PC_REGNUM);
2166 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2170 ASM_OUTPUT_ALIGN (f, 2);
2171 fprintf (f, "\t.code\t16\n");
2172 fprintf (f, ".Ltrampoline_start:\n");
2173 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2174 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2175 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2176 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2177 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2178 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2180 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2181 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2184 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2187 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2189 rtx fnaddr, mem, a_tramp;
2191 emit_block_move (m_tramp, assemble_trampoline_template (),
2192 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2194 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2195 emit_move_insn (mem, chain_value);
2197 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2198 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2199 emit_move_insn (mem, fnaddr);
2201 a_tramp = XEXP (m_tramp, 0);
2202 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2203 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2204 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2207 /* Thumb trampolines should be entered in thumb mode, so set
2208 the bottom bit of the address. */
2211 arm_trampoline_adjust_address (rtx addr)
2214 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2215 NULL, 0, OPTAB_LIB_WIDEN);
2219 /* Return 1 if it is possible to return using a single instruction.
2220 If SIBLING is non-null, this is a test for a return before a sibling
2221 call. SIBLING is the call insn, so we can examine its register usage. */
2224 use_return_insn (int iscond, rtx sibling)
2227 unsigned int func_type;
2228 unsigned long saved_int_regs;
2229 unsigned HOST_WIDE_INT stack_adjust;
2230 arm_stack_offsets *offsets;
2232 /* Never use a return instruction before reload has run. */
2233 if (!reload_completed)
2236 func_type = arm_current_func_type ();
2238 /* Naked, volatile and stack alignment functions need special
2240 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2243 /* So do interrupt functions that use the frame pointer and Thumb
2244 interrupt functions. */
2245 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2248 offsets = arm_get_frame_offsets ();
2249 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2251 /* As do variadic functions. */
2252 if (crtl->args.pretend_args_size
2253 || cfun->machine->uses_anonymous_args
2254 /* Or if the function calls __builtin_eh_return () */
2255 || crtl->calls_eh_return
2256 /* Or if the function calls alloca */
2257 || cfun->calls_alloca
2258 /* Or if there is a stack adjustment. However, if the stack pointer
2259 is saved on the stack, we can use a pre-incrementing stack load. */
2260 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2261 && stack_adjust == 4)))
2264 saved_int_regs = offsets->saved_regs_mask;
2266 /* Unfortunately, the insn
2268 ldmib sp, {..., sp, ...}
2270 triggers a bug on most SA-110 based devices, such that the stack
2271 pointer won't be correctly restored if the instruction takes a
2272 page fault. We work around this problem by popping r3 along with
2273 the other registers, since that is never slower than executing
2274 another instruction.
2276 We test for !arm_arch5 here, because code for any architecture
2277 less than this could potentially be run on one of the buggy
2279 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2281 /* Validate that r3 is a call-clobbered register (always true in
2282 the default abi) ... */
2283 if (!call_used_regs[3])
2286 /* ... that it isn't being used for a return value ... */
2287 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2290 /* ... or for a tail-call argument ... */
2293 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2295 if (find_regno_fusage (sibling, USE, 3))
2299 /* ... and that there are no call-saved registers in r0-r2
2300 (always true in the default ABI). */
2301 if (saved_int_regs & 0x7)
2305 /* Can't be done if interworking with Thumb, and any registers have been
2307 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2310 /* On StrongARM, conditional returns are expensive if they aren't
2311 taken and multiple registers have been stacked. */
2312 if (iscond && arm_tune_strongarm)
2314 /* Conditional return when just the LR is stored is a simple
2315 conditional-load instruction, that's not expensive. */
2316 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2320 && arm_pic_register != INVALID_REGNUM
2321 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2325 /* If there are saved registers but the LR isn't saved, then we need
2326 two instructions for the return. */
2327 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2330 /* Can't be done if any of the FPA regs are pushed,
2331 since this also requires an insn. */
2332 if (TARGET_HARD_FLOAT && TARGET_FPA)
2333 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2334 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2337 /* Likewise VFP regs. */
2338 if (TARGET_HARD_FLOAT && TARGET_VFP)
2339 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2340 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2343 if (TARGET_REALLY_IWMMXT)
2344 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2345 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2351 /* Return TRUE if int I is a valid immediate ARM constant. */
2354 const_ok_for_arm (HOST_WIDE_INT i)
2358 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2359 be all zero, or all one. */
2360 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2361 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2362 != ((~(unsigned HOST_WIDE_INT) 0)
2363 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2366 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2368 /* Fast return for 0 and small values. We must do this for zero, since
2369 the code below can't handle that one case. */
2370 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2373 /* Get the number of trailing zeros. */
2374 lowbit = ffs((int) i) - 1;
2376 /* Only even shifts are allowed in ARM mode so round down to the
2377 nearest even number. */
2381 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2386 /* Allow rotated constants in ARM mode. */
2388 && ((i & ~0xc000003f) == 0
2389 || (i & ~0xf000000f) == 0
2390 || (i & ~0xfc000003) == 0))
2397 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2400 if (i == v || i == (v | (v << 8)))
2403 /* Allow repeated pattern 0xXY00XY00. */
2413 /* Return true if I is a valid constant for the operation CODE. */
2415 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2417 if (const_ok_for_arm (i))
2423 /* See if we can use movw. */
2424 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2427 /* Otherwise, try mvn. */
2428 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2431 /* See if we can use addw or subw. */
2433 && ((i & 0xfffff000) == 0
2434 || ((-i) & 0xfffff000) == 0))
2436 /* else fall through. */
2456 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2458 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2464 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2468 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2475 /* Emit a sequence of insns to handle a large constant.
2476 CODE is the code of the operation required, it can be any of SET, PLUS,
2477 IOR, AND, XOR, MINUS;
2478 MODE is the mode in which the operation is being performed;
2479 VAL is the integer to operate on;
2480 SOURCE is the other operand (a register, or a null-pointer for SET);
2481 SUBTARGETS means it is safe to create scratch registers if that will
2482 either produce a simpler sequence, or we will want to cse the values.
2483 Return value is the number of insns emitted. */
2485 /* ??? Tweak this for thumb2. */
2487 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2488 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2492 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2493 cond = COND_EXEC_TEST (PATTERN (insn));
2497 if (subtargets || code == SET
2498 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2499 && REGNO (target) != REGNO (source)))
2501 /* After arm_reorg has been called, we can't fix up expensive
2502 constants by pushing them into memory so we must synthesize
2503 them in-line, regardless of the cost. This is only likely to
2504 be more costly on chips that have load delay slots and we are
2505 compiling without running the scheduler (so no splitting
2506 occurred before the final instruction emission).
2508 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2510 if (!after_arm_reorg
2512 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2514 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2519 /* Currently SET is the only monadic value for CODE, all
2520 the rest are diadic. */
2521 if (TARGET_USE_MOVT)
2522 arm_emit_movpair (target, GEN_INT (val));
2524 emit_set_insn (target, GEN_INT (val));
2530 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2532 if (TARGET_USE_MOVT)
2533 arm_emit_movpair (temp, GEN_INT (val));
2535 emit_set_insn (temp, GEN_INT (val));
2537 /* For MINUS, the value is subtracted from, since we never
2538 have subtraction of a constant. */
2540 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2542 emit_set_insn (target,
2543 gen_rtx_fmt_ee (code, mode, source, temp));
2549 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2553 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2554 ARM/THUMB2 immediates, and add up to VAL.
2555 Thr function return value gives the number of insns required. */
2557 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2558 struct four_ints *return_sequence)
2560 int best_consecutive_zeros = 0;
2564 struct four_ints tmp_sequence;
2566 /* If we aren't targetting ARM, the best place to start is always at
2567 the bottom, otherwise look more closely. */
2570 for (i = 0; i < 32; i += 2)
2572 int consecutive_zeros = 0;
2574 if (!(val & (3 << i)))
2576 while ((i < 32) && !(val & (3 << i)))
2578 consecutive_zeros += 2;
2581 if (consecutive_zeros > best_consecutive_zeros)
2583 best_consecutive_zeros = consecutive_zeros;
2584 best_start = i - consecutive_zeros;
2591 /* So long as it won't require any more insns to do so, it's
2592 desirable to emit a small constant (in bits 0...9) in the last
2593 insn. This way there is more chance that it can be combined with
2594 a later addressing insn to form a pre-indexed load or store
2595 operation. Consider:
2597 *((volatile int *)0xe0000100) = 1;
2598 *((volatile int *)0xe0000110) = 2;
2600 We want this to wind up as:
2604 str rB, [rA, #0x100]
2606 str rB, [rA, #0x110]
2608 rather than having to synthesize both large constants from scratch.
2610 Therefore, we calculate how many insns would be required to emit
2611 the constant starting from `best_start', and also starting from
2612 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2613 yield a shorter sequence, we may as well use zero. */
2614 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2616 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2618 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2619 if (insns2 <= insns1)
2621 *return_sequence = tmp_sequence;
2629 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2631 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2632 struct four_ints *return_sequence, int i)
2634 int remainder = val & 0xffffffff;
2637 /* Try and find a way of doing the job in either two or three
2640 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2641 location. We start at position I. This may be the MSB, or
2642 optimial_immediate_sequence may have positioned it at the largest block
2643 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2644 wrapping around to the top of the word when we drop off the bottom.
2645 In the worst case this code should produce no more than four insns.
2647 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2648 constants, shifted to any arbitrary location. We should always start
2653 unsigned int b1, b2, b3, b4;
2654 unsigned HOST_WIDE_INT result;
2657 gcc_assert (insns < 4);
2662 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2663 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2666 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2667 /* We can use addw/subw for the last 12 bits. */
2671 /* Use an 8-bit shifted/rotated immediate. */
2675 result = remainder & ((0x0ff << end)
2676 | ((i < end) ? (0xff >> (32 - end))
2683 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2684 arbitrary shifts. */
2685 i -= TARGET_ARM ? 2 : 1;
2689 /* Next, see if we can do a better job with a thumb2 replicated
2692 We do it this way around to catch the cases like 0x01F001E0 where
2693 two 8-bit immediates would work, but a replicated constant would
2696 TODO: 16-bit constants that don't clear all the bits, but still win.
2697 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2700 b1 = (remainder & 0xff000000) >> 24;
2701 b2 = (remainder & 0x00ff0000) >> 16;
2702 b3 = (remainder & 0x0000ff00) >> 8;
2703 b4 = remainder & 0xff;
2707 /* The 8-bit immediate already found clears b1 (and maybe b2),
2708 but must leave b3 and b4 alone. */
2710 /* First try to find a 32-bit replicated constant that clears
2711 almost everything. We can assume that we can't do it in one,
2712 or else we wouldn't be here. */
2713 unsigned int tmp = b1 & b2 & b3 & b4;
2714 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2716 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2717 + (tmp == b3) + (tmp == b4);
2719 && (matching_bytes >= 3
2720 || (matching_bytes == 2
2721 && const_ok_for_op (remainder & ~tmp2, code))))
2723 /* At least 3 of the bytes match, and the fourth has at
2724 least as many bits set, or two of the bytes match
2725 and it will only require one more insn to finish. */
2733 /* Second, try to find a 16-bit replicated constant that can
2734 leave three of the bytes clear. If b2 or b4 is already
2735 zero, then we can. If the 8-bit from above would not
2736 clear b2 anyway, then we still win. */
2737 else if (b1 == b3 && (!b2 || !b4
2738 || (remainder & 0x00ff0000 & ~result)))
2740 result = remainder & 0xff00ff00;
2746 /* The 8-bit immediate already found clears b2 (and maybe b3)
2747 and we don't get here unless b1 is alredy clear, but it will
2748 leave b4 unchanged. */
2750 /* If we can clear b2 and b4 at once, then we win, since the
2751 8-bits couldn't possibly reach that far. */
2754 result = remainder & 0x00ff00ff;
2760 return_sequence->i[insns++] = result;
2761 remainder &= ~result;
2763 if (code == SET || code == MINUS)
2771 /* Emit an instruction with the indicated PATTERN. If COND is
2772 non-NULL, conditionalize the execution of the instruction on COND
2776 emit_constant_insn (rtx cond, rtx pattern)
2779 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2780 emit_insn (pattern);
2783 /* As above, but extra parameter GENERATE which, if clear, suppresses
2787 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2788 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2793 int final_invert = 0;
2795 int set_sign_bit_copies = 0;
2796 int clear_sign_bit_copies = 0;
2797 int clear_zero_bit_copies = 0;
2798 int set_zero_bit_copies = 0;
2799 int insns = 0, neg_insns, inv_insns;
2800 unsigned HOST_WIDE_INT temp1, temp2;
2801 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2802 struct four_ints *immediates;
2803 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2805 /* Find out which operations are safe for a given CODE. Also do a quick
2806 check for degenerate cases; these can occur when DImode operations
2819 if (remainder == 0xffffffff)
2822 emit_constant_insn (cond,
2823 gen_rtx_SET (VOIDmode, target,
2824 GEN_INT (ARM_SIGN_EXTEND (val))));
2830 if (reload_completed && rtx_equal_p (target, source))
2834 emit_constant_insn (cond,
2835 gen_rtx_SET (VOIDmode, target, source));
2844 emit_constant_insn (cond,
2845 gen_rtx_SET (VOIDmode, target, const0_rtx));
2848 if (remainder == 0xffffffff)
2850 if (reload_completed && rtx_equal_p (target, source))
2853 emit_constant_insn (cond,
2854 gen_rtx_SET (VOIDmode, target, source));
2863 if (reload_completed && rtx_equal_p (target, source))
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, target, source));
2871 if (remainder == 0xffffffff)
2874 emit_constant_insn (cond,
2875 gen_rtx_SET (VOIDmode, target,
2876 gen_rtx_NOT (mode, source)));
2883 /* We treat MINUS as (val - source), since (source - val) is always
2884 passed as (source + (-val)). */
2888 emit_constant_insn (cond,
2889 gen_rtx_SET (VOIDmode, target,
2890 gen_rtx_NEG (mode, source)));
2893 if (const_ok_for_arm (val))
2896 emit_constant_insn (cond,
2897 gen_rtx_SET (VOIDmode, target,
2898 gen_rtx_MINUS (mode, GEN_INT (val),
2909 /* If we can do it in one insn get out quickly. */
2910 if (const_ok_for_op (val, code))
2913 emit_constant_insn (cond,
2914 gen_rtx_SET (VOIDmode, target,
2916 ? gen_rtx_fmt_ee (code, mode, source,
2922 /* Calculate a few attributes that may be useful for specific
2924 /* Count number of leading zeros. */
2925 for (i = 31; i >= 0; i--)
2927 if ((remainder & (1 << i)) == 0)
2928 clear_sign_bit_copies++;
2933 /* Count number of leading 1's. */
2934 for (i = 31; i >= 0; i--)
2936 if ((remainder & (1 << i)) != 0)
2937 set_sign_bit_copies++;
2942 /* Count number of trailing zero's. */
2943 for (i = 0; i <= 31; i++)
2945 if ((remainder & (1 << i)) == 0)
2946 clear_zero_bit_copies++;
2951 /* Count number of trailing 1's. */
2952 for (i = 0; i <= 31; i++)
2954 if ((remainder & (1 << i)) != 0)
2955 set_zero_bit_copies++;
2963 /* See if we can do this by sign_extending a constant that is known
2964 to be negative. This is a good, way of doing it, since the shift
2965 may well merge into a subsequent insn. */
2966 if (set_sign_bit_copies > 1)
2968 if (const_ok_for_arm
2969 (temp1 = ARM_SIGN_EXTEND (remainder
2970 << (set_sign_bit_copies - 1))))
2974 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2975 emit_constant_insn (cond,
2976 gen_rtx_SET (VOIDmode, new_src,
2978 emit_constant_insn (cond,
2979 gen_ashrsi3 (target, new_src,
2980 GEN_INT (set_sign_bit_copies - 1)));
2984 /* For an inverted constant, we will need to set the low bits,
2985 these will be shifted out of harm's way. */
2986 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2987 if (const_ok_for_arm (~temp1))
2991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2992 emit_constant_insn (cond,
2993 gen_rtx_SET (VOIDmode, new_src,
2995 emit_constant_insn (cond,
2996 gen_ashrsi3 (target, new_src,
2997 GEN_INT (set_sign_bit_copies - 1)));
3003 /* See if we can calculate the value as the difference between two
3004 valid immediates. */
3005 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3007 int topshift = clear_sign_bit_copies & ~1;
3009 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3010 & (0xff000000 >> topshift));
3012 /* If temp1 is zero, then that means the 9 most significant
3013 bits of remainder were 1 and we've caused it to overflow.
3014 When topshift is 0 we don't need to do anything since we
3015 can borrow from 'bit 32'. */
3016 if (temp1 == 0 && topshift != 0)
3017 temp1 = 0x80000000 >> (topshift - 1);
3019 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3021 if (const_ok_for_arm (temp2))
3025 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3026 emit_constant_insn (cond,
3027 gen_rtx_SET (VOIDmode, new_src,
3029 emit_constant_insn (cond,
3030 gen_addsi3 (target, new_src,
3038 /* See if we can generate this by setting the bottom (or the top)
3039 16 bits, and then shifting these into the other half of the
3040 word. We only look for the simplest cases, to do more would cost
3041 too much. Be careful, however, not to generate this when the
3042 alternative would take fewer insns. */
3043 if (val & 0xffff0000)
3045 temp1 = remainder & 0xffff0000;
3046 temp2 = remainder & 0x0000ffff;
3048 /* Overlaps outside this range are best done using other methods. */
3049 for (i = 9; i < 24; i++)
3051 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3052 && !const_ok_for_arm (temp2))
3054 rtx new_src = (subtargets
3055 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3057 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3058 source, subtargets, generate);
3066 gen_rtx_ASHIFT (mode, source,
3073 /* Don't duplicate cases already considered. */
3074 for (i = 17; i < 24; i++)
3076 if (((temp1 | (temp1 >> i)) == remainder)
3077 && !const_ok_for_arm (temp1))
3079 rtx new_src = (subtargets
3080 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3082 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3083 source, subtargets, generate);
3088 gen_rtx_SET (VOIDmode, target,
3091 gen_rtx_LSHIFTRT (mode, source,
3102 /* If we have IOR or XOR, and the constant can be loaded in a
3103 single instruction, and we can find a temporary to put it in,
3104 then this can be done in two instructions instead of 3-4. */
3106 /* TARGET can't be NULL if SUBTARGETS is 0 */
3107 || (reload_completed && !reg_mentioned_p (target, source)))
3109 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3113 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3115 emit_constant_insn (cond,
3116 gen_rtx_SET (VOIDmode, sub,
3118 emit_constant_insn (cond,
3119 gen_rtx_SET (VOIDmode, target,
3120 gen_rtx_fmt_ee (code, mode,
3131 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3132 and the remainder 0s for e.g. 0xfff00000)
3133 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3135 This can be done in 2 instructions by using shifts with mov or mvn.
3140 mvn r0, r0, lsr #12 */
3141 if (set_sign_bit_copies > 8
3142 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3146 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3147 rtx shift = GEN_INT (set_sign_bit_copies);
3151 gen_rtx_SET (VOIDmode, sub,
3153 gen_rtx_ASHIFT (mode,
3158 gen_rtx_SET (VOIDmode, target,
3160 gen_rtx_LSHIFTRT (mode, sub,
3167 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3169 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3171 For eg. r0 = r0 | 0xfff
3176 if (set_zero_bit_copies > 8
3177 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3181 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3182 rtx shift = GEN_INT (set_zero_bit_copies);
3186 gen_rtx_SET (VOIDmode, sub,
3188 gen_rtx_LSHIFTRT (mode,
3193 gen_rtx_SET (VOIDmode, target,
3195 gen_rtx_ASHIFT (mode, sub,
3201 /* This will never be reached for Thumb2 because orn is a valid
3202 instruction. This is for Thumb1 and the ARM 32 bit cases.
3204 x = y | constant (such that ~constant is a valid constant)
3206 x = ~(~y & ~constant).
3208 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3212 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3213 emit_constant_insn (cond,
3214 gen_rtx_SET (VOIDmode, sub,
3215 gen_rtx_NOT (mode, source)));
3218 sub = gen_reg_rtx (mode);
3219 emit_constant_insn (cond,
3220 gen_rtx_SET (VOIDmode, sub,
3221 gen_rtx_AND (mode, source,
3223 emit_constant_insn (cond,
3224 gen_rtx_SET (VOIDmode, target,
3225 gen_rtx_NOT (mode, sub)));
3232 /* See if two shifts will do 2 or more insn's worth of work. */
3233 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3235 HOST_WIDE_INT shift_mask = ((0xffffffff
3236 << (32 - clear_sign_bit_copies))
3239 if ((remainder | shift_mask) != 0xffffffff)
3243 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3244 insns = arm_gen_constant (AND, mode, cond,
3245 remainder | shift_mask,
3246 new_src, source, subtargets, 1);
3251 rtx targ = subtargets ? NULL_RTX : target;
3252 insns = arm_gen_constant (AND, mode, cond,
3253 remainder | shift_mask,
3254 targ, source, subtargets, 0);
3260 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3261 rtx shift = GEN_INT (clear_sign_bit_copies);
3263 emit_insn (gen_ashlsi3 (new_src, source, shift));
3264 emit_insn (gen_lshrsi3 (target, new_src, shift));
3270 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3272 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3274 if ((remainder | shift_mask) != 0xffffffff)
3278 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3280 insns = arm_gen_constant (AND, mode, cond,
3281 remainder | shift_mask,
3282 new_src, source, subtargets, 1);
3287 rtx targ = subtargets ? NULL_RTX : target;
3289 insns = arm_gen_constant (AND, mode, cond,
3290 remainder | shift_mask,
3291 targ, source, subtargets, 0);
3297 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3298 rtx shift = GEN_INT (clear_zero_bit_copies);
3300 emit_insn (gen_lshrsi3 (new_src, source, shift));
3301 emit_insn (gen_ashlsi3 (target, new_src, shift));
3313 /* Calculate what the instruction sequences would be if we generated it
3314 normally, negated, or inverted. */
3316 /* AND cannot be split into multiple insns, so invert and use BIC. */
3319 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3322 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3327 if (can_invert || final_invert)
3328 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3333 immediates = &pos_immediates;
3335 /* Is the negated immediate sequence more efficient? */
3336 if (neg_insns < insns && neg_insns <= inv_insns)
3339 immediates = &neg_immediates;
3344 /* Is the inverted immediate sequence more efficient?
3345 We must allow for an extra NOT instruction for XOR operations, although
3346 there is some chance that the final 'mvn' will get optimized later. */
3347 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3350 immediates = &inv_immediates;
3358 /* Now output the chosen sequence as instructions. */
3361 for (i = 0; i < insns; i++)
3363 rtx new_src, temp1_rtx;
3365 temp1 = immediates->i[i];
3367 if (code == SET || code == MINUS)
3368 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3369 else if ((final_invert || i < (insns - 1)) && subtargets)
3370 new_src = gen_reg_rtx (mode);
3376 else if (can_negate)
3379 temp1 = trunc_int_for_mode (temp1, mode);
3380 temp1_rtx = GEN_INT (temp1);
3384 else if (code == MINUS)
3385 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3387 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3389 emit_constant_insn (cond,
3390 gen_rtx_SET (VOIDmode, new_src,
3396 can_negate = can_invert;
3400 else if (code == MINUS)
3408 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3409 gen_rtx_NOT (mode, source)));
3416 /* Canonicalize a comparison so that we are more likely to recognize it.
3417 This can be done for a few constant compares, where we can make the
3418 immediate value easier to load. */
3421 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3423 enum machine_mode mode;
3424 unsigned HOST_WIDE_INT i, maxval;
3426 mode = GET_MODE (*op0);
3427 if (mode == VOIDmode)
3428 mode = GET_MODE (*op1);
3430 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3432 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3433 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3434 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3435 for GTU/LEU in Thumb mode. */
3440 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3442 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3445 if (code == GT || code == LE
3446 || (!TARGET_ARM && (code == GTU || code == LEU)))
3448 /* Missing comparison. First try to use an available
3450 if (GET_CODE (*op1) == CONST_INT)
3458 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3460 *op1 = GEN_INT (i + 1);
3461 return code == GT ? GE : LT;
3466 if (i != ~((unsigned HOST_WIDE_INT) 0)
3467 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3469 *op1 = GEN_INT (i + 1);
3470 return code == GTU ? GEU : LTU;
3478 /* If that did not work, reverse the condition. */
3482 return swap_condition (code);
3488 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3489 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3490 to facilitate possible combining with a cmp into 'ands'. */
3492 && GET_CODE (*op0) == ZERO_EXTEND
3493 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3494 && GET_MODE (XEXP (*op0, 0)) == QImode
3495 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3496 && subreg_lowpart_p (XEXP (*op0, 0))
3497 && *op1 == const0_rtx)
3498 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3501 /* Comparisons smaller than DImode. Only adjust comparisons against
3502 an out-of-range constant. */
3503 if (GET_CODE (*op1) != CONST_INT
3504 || const_ok_for_arm (INTVAL (*op1))
3505 || const_ok_for_arm (- INTVAL (*op1)))
3519 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3521 *op1 = GEN_INT (i + 1);
3522 return code == GT ? GE : LT;
3529 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3531 *op1 = GEN_INT (i - 1);
3532 return code == GE ? GT : LE;
3538 if (i != ~((unsigned HOST_WIDE_INT) 0)
3539 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3541 *op1 = GEN_INT (i + 1);
3542 return code == GTU ? GEU : LTU;
3549 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3551 *op1 = GEN_INT (i - 1);
3552 return code == GEU ? GTU : LEU;
3564 /* Define how to find the value returned by a function. */
3567 arm_function_value(const_tree type, const_tree func,
3568 bool outgoing ATTRIBUTE_UNUSED)
3570 enum machine_mode mode;
3571 int unsignedp ATTRIBUTE_UNUSED;
3572 rtx r ATTRIBUTE_UNUSED;
3574 mode = TYPE_MODE (type);
3576 if (TARGET_AAPCS_BASED)
3577 return aapcs_allocate_return_reg (mode, type, func);
3579 /* Promote integer types. */
3580 if (INTEGRAL_TYPE_P (type))
3581 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3583 /* Promotes small structs returned in a register to full-word size
3584 for big-endian AAPCS. */
3585 if (arm_return_in_msb (type))
3587 HOST_WIDE_INT size = int_size_in_bytes (type);
3588 if (size % UNITS_PER_WORD != 0)
3590 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3591 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3595 return LIBCALL_VALUE (mode);
3599 libcall_eq (const void *p1, const void *p2)
3601 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3605 libcall_hash (const void *p1)
3607 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3611 add_libcall (htab_t htab, rtx libcall)
3613 *htab_find_slot (htab, libcall, INSERT) = libcall;
3617 arm_libcall_uses_aapcs_base (const_rtx libcall)
3619 static bool init_done = false;
3620 static htab_t libcall_htab;
3626 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3628 add_libcall (libcall_htab,
3629 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3630 add_libcall (libcall_htab,
3631 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3632 add_libcall (libcall_htab,
3633 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3634 add_libcall (libcall_htab,
3635 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3637 add_libcall (libcall_htab,
3638 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3639 add_libcall (libcall_htab,
3640 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3641 add_libcall (libcall_htab,
3642 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3643 add_libcall (libcall_htab,
3644 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3650 add_libcall (libcall_htab,
3651 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3652 add_libcall (libcall_htab,
3653 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3654 add_libcall (libcall_htab,
3655 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3656 add_libcall (libcall_htab,
3657 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3659 /* Values from double-precision helper functions are returned in core
3660 registers if the selected core only supports single-precision
3661 arithmetic, even if we are using the hard-float ABI. The same is
3662 true for single-precision helpers, but we will never be using the
3663 hard-float ABI on a CPU which doesn't support single-precision
3664 operations in hardware. */
3665 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3666 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3667 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3668 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3669 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3670 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3671 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3672 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3673 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3674 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3675 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3676 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3678 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3682 return libcall && htab_find (libcall_htab, libcall) != NULL;
3686 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3688 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3689 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3691 /* The following libcalls return their result in integer registers,
3692 even though they return a floating point value. */
3693 if (arm_libcall_uses_aapcs_base (libcall))
3694 return gen_rtx_REG (mode, ARG_REGISTER(1));
3698 return LIBCALL_VALUE (mode);
3701 /* Determine the amount of memory needed to store the possible return
3702 registers of an untyped call. */
3704 arm_apply_result_size (void)
3710 if (TARGET_HARD_FLOAT_ABI)
3716 if (TARGET_MAVERICK)
3719 if (TARGET_IWMMXT_ABI)
3726 /* Decide whether TYPE should be returned in memory (true)
3727 or in a register (false). FNTYPE is the type of the function making
3730 arm_return_in_memory (const_tree type, const_tree fntype)
3734 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3736 if (TARGET_AAPCS_BASED)
3738 /* Simple, non-aggregate types (ie not including vectors and
3739 complex) are always returned in a register (or registers).
3740 We don't care about which register here, so we can short-cut
3741 some of the detail. */
3742 if (!AGGREGATE_TYPE_P (type)
3743 && TREE_CODE (type) != VECTOR_TYPE
3744 && TREE_CODE (type) != COMPLEX_TYPE)
3747 /* Any return value that is no larger than one word can be
3749 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3752 /* Check any available co-processors to see if they accept the
3753 type as a register candidate (VFP, for example, can return
3754 some aggregates in consecutive registers). These aren't
3755 available if the call is variadic. */
3756 if (aapcs_select_return_coproc (type, fntype) >= 0)
3759 /* Vector values should be returned using ARM registers, not
3760 memory (unless they're over 16 bytes, which will break since
3761 we only have four call-clobbered registers to play with). */
3762 if (TREE_CODE (type) == VECTOR_TYPE)
3763 return (size < 0 || size > (4 * UNITS_PER_WORD));
3765 /* The rest go in memory. */
3769 if (TREE_CODE (type) == VECTOR_TYPE)
3770 return (size < 0 || size > (4 * UNITS_PER_WORD));
3772 if (!AGGREGATE_TYPE_P (type) &&
3773 (TREE_CODE (type) != VECTOR_TYPE))
3774 /* All simple types are returned in registers. */
3777 if (arm_abi != ARM_ABI_APCS)
3779 /* ATPCS and later return aggregate types in memory only if they are
3780 larger than a word (or are variable size). */
3781 return (size < 0 || size > UNITS_PER_WORD);
3784 /* For the arm-wince targets we choose to be compatible with Microsoft's
3785 ARM and Thumb compilers, which always return aggregates in memory. */
3787 /* All structures/unions bigger than one word are returned in memory.
3788 Also catch the case where int_size_in_bytes returns -1. In this case
3789 the aggregate is either huge or of variable size, and in either case
3790 we will want to return it via memory and not in a register. */
3791 if (size < 0 || size > UNITS_PER_WORD)
3794 if (TREE_CODE (type) == RECORD_TYPE)
3798 /* For a struct the APCS says that we only return in a register
3799 if the type is 'integer like' and every addressable element
3800 has an offset of zero. For practical purposes this means
3801 that the structure can have at most one non bit-field element
3802 and that this element must be the first one in the structure. */
3804 /* Find the first field, ignoring non FIELD_DECL things which will
3805 have been created by C++. */
3806 for (field = TYPE_FIELDS (type);
3807 field && TREE_CODE (field) != FIELD_DECL;
3808 field = DECL_CHAIN (field))
3812 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3814 /* Check that the first field is valid for returning in a register. */
3816 /* ... Floats are not allowed */
3817 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3820 /* ... Aggregates that are not themselves valid for returning in
3821 a register are not allowed. */
3822 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3825 /* Now check the remaining fields, if any. Only bitfields are allowed,
3826 since they are not addressable. */
3827 for (field = DECL_CHAIN (field);
3829 field = DECL_CHAIN (field))
3831 if (TREE_CODE (field) != FIELD_DECL)
3834 if (!DECL_BIT_FIELD_TYPE (field))
3841 if (TREE_CODE (type) == UNION_TYPE)
3845 /* Unions can be returned in registers if every element is
3846 integral, or can be returned in an integer register. */
3847 for (field = TYPE_FIELDS (type);
3849 field = DECL_CHAIN (field))
3851 if (TREE_CODE (field) != FIELD_DECL)
3854 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3857 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3863 #endif /* not ARM_WINCE */
3865 /* Return all other types in memory. */
3869 /* Indicate whether or not words of a double are in big-endian order. */
3872 arm_float_words_big_endian (void)
3874 if (TARGET_MAVERICK)
3877 /* For FPA, float words are always big-endian. For VFP, floats words
3878 follow the memory system mode. */
3886 return (TARGET_BIG_END ? 1 : 0);
3891 const struct pcs_attribute_arg
3895 } pcs_attribute_args[] =
3897 {"aapcs", ARM_PCS_AAPCS},
3898 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3900 /* We could recognize these, but changes would be needed elsewhere
3901 * to implement them. */
3902 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3903 {"atpcs", ARM_PCS_ATPCS},
3904 {"apcs", ARM_PCS_APCS},
3906 {NULL, ARM_PCS_UNKNOWN}
3910 arm_pcs_from_attribute (tree attr)
3912 const struct pcs_attribute_arg *ptr;
3915 /* Get the value of the argument. */
3916 if (TREE_VALUE (attr) == NULL_TREE
3917 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3918 return ARM_PCS_UNKNOWN;
3920 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3922 /* Check it against the list of known arguments. */
3923 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3924 if (streq (arg, ptr->arg))
3927 /* An unrecognized interrupt type. */
3928 return ARM_PCS_UNKNOWN;
3931 /* Get the PCS variant to use for this call. TYPE is the function's type
3932 specification, DECL is the specific declartion. DECL may be null if
3933 the call could be indirect or if this is a library call. */
3935 arm_get_pcs_model (const_tree type, const_tree decl)
3937 bool user_convention = false;
3938 enum arm_pcs user_pcs = arm_pcs_default;
3943 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3946 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3947 user_convention = true;
3950 if (TARGET_AAPCS_BASED)
3952 /* Detect varargs functions. These always use the base rules
3953 (no argument is ever a candidate for a co-processor
3955 bool base_rules = stdarg_p (type);
3957 if (user_convention)
3959 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3960 sorry ("non-AAPCS derived PCS variant");
3961 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3962 error ("variadic functions must use the base AAPCS variant");
3966 return ARM_PCS_AAPCS;
3967 else if (user_convention)
3969 else if (decl && flag_unit_at_a_time)
3971 /* Local functions never leak outside this compilation unit,
3972 so we are free to use whatever conventions are
3974 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3975 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3977 return ARM_PCS_AAPCS_LOCAL;
3980 else if (user_convention && user_pcs != arm_pcs_default)
3981 sorry ("PCS variant");
3983 /* For everything else we use the target's default. */
3984 return arm_pcs_default;
3989 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3990 const_tree fntype ATTRIBUTE_UNUSED,
3991 rtx libcall ATTRIBUTE_UNUSED,
3992 const_tree fndecl ATTRIBUTE_UNUSED)
3994 /* Record the unallocated VFP registers. */
3995 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3996 pcum->aapcs_vfp_reg_alloc = 0;
3999 /* Walk down the type tree of TYPE counting consecutive base elements.
4000 If *MODEP is VOIDmode, then set it to the first valid floating point
4001 type. If a non-floating point type is found, or if a floating point
4002 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4003 otherwise return the count in the sub-tree. */
4005 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4007 enum machine_mode mode;
4010 switch (TREE_CODE (type))
4013 mode = TYPE_MODE (type);
4014 if (mode != DFmode && mode != SFmode)
4017 if (*modep == VOIDmode)
4026 mode = TYPE_MODE (TREE_TYPE (type));
4027 if (mode != DFmode && mode != SFmode)
4030 if (*modep == VOIDmode)
4039 /* Use V2SImode and V4SImode as representatives of all 64-bit
4040 and 128-bit vector types, whether or not those modes are
4041 supported with the present options. */
4042 size = int_size_in_bytes (type);
4055 if (*modep == VOIDmode)
4058 /* Vector modes are considered to be opaque: two vectors are
4059 equivalent for the purposes of being homogeneous aggregates
4060 if they are the same size. */
4069 tree index = TYPE_DOMAIN (type);
4071 /* Can't handle incomplete types. */
4072 if (!COMPLETE_TYPE_P(type))
4075 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4078 || !TYPE_MAX_VALUE (index)
4079 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4080 || !TYPE_MIN_VALUE (index)
4081 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4085 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4086 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4088 /* There must be no padding. */
4089 if (!host_integerp (TYPE_SIZE (type), 1)
4090 || (tree_low_cst (TYPE_SIZE (type), 1)
4091 != count * GET_MODE_BITSIZE (*modep)))
4103 /* Can't handle incomplete types. */
4104 if (!COMPLETE_TYPE_P(type))
4107 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4109 if (TREE_CODE (field) != FIELD_DECL)
4112 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4118 /* There must be no padding. */
4119 if (!host_integerp (TYPE_SIZE (type), 1)
4120 || (tree_low_cst (TYPE_SIZE (type), 1)
4121 != count * GET_MODE_BITSIZE (*modep)))
4128 case QUAL_UNION_TYPE:
4130 /* These aren't very interesting except in a degenerate case. */
4135 /* Can't handle incomplete types. */
4136 if (!COMPLETE_TYPE_P(type))
4139 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4141 if (TREE_CODE (field) != FIELD_DECL)
4144 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4147 count = count > sub_count ? count : sub_count;
4150 /* There must be no padding. */
4151 if (!host_integerp (TYPE_SIZE (type), 1)
4152 || (tree_low_cst (TYPE_SIZE (type), 1)
4153 != count * GET_MODE_BITSIZE (*modep)))
4166 /* Return true if PCS_VARIANT should use VFP registers. */
4168 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4170 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4172 static bool seen_thumb1_vfp = false;
4174 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4176 sorry ("Thumb-1 hard-float VFP ABI");
4177 /* sorry() is not immediately fatal, so only display this once. */
4178 seen_thumb1_vfp = true;
4184 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4187 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4188 (TARGET_VFP_DOUBLE || !is_double));
4192 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4193 enum machine_mode mode, const_tree type,
4194 enum machine_mode *base_mode, int *count)
4196 enum machine_mode new_mode = VOIDmode;
4198 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4199 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4200 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4205 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4208 new_mode = (mode == DCmode ? DFmode : SFmode);
4210 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4212 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4214 if (ag_count > 0 && ag_count <= 4)
4223 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4226 *base_mode = new_mode;
4231 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4232 enum machine_mode mode, const_tree type)
4234 int count ATTRIBUTE_UNUSED;
4235 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4237 if (!use_vfp_abi (pcs_variant, false))
4239 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4244 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4247 if (!use_vfp_abi (pcum->pcs_variant, false))
4250 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4251 &pcum->aapcs_vfp_rmode,
4252 &pcum->aapcs_vfp_rcount);
4256 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4257 const_tree type ATTRIBUTE_UNUSED)
4259 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4260 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4263 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4264 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4266 pcum->aapcs_vfp_reg_alloc = mask << regno;
4267 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4270 int rcount = pcum->aapcs_vfp_rcount;
4272 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4276 /* Avoid using unsupported vector modes. */
4277 if (rmode == V2SImode)
4279 else if (rmode == V4SImode)
4286 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4287 for (i = 0; i < rcount; i++)
4289 rtx tmp = gen_rtx_REG (rmode,
4290 FIRST_VFP_REGNUM + regno + i * rshift);
4291 tmp = gen_rtx_EXPR_LIST
4293 GEN_INT (i * GET_MODE_SIZE (rmode)));
4294 XVECEXP (par, 0, i) = tmp;
4297 pcum->aapcs_reg = par;
4300 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4307 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4308 enum machine_mode mode,
4309 const_tree type ATTRIBUTE_UNUSED)
4311 if (!use_vfp_abi (pcs_variant, false))
4314 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4317 enum machine_mode ag_mode;
4322 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4327 if (ag_mode == V2SImode)
4329 else if (ag_mode == V4SImode)
4335 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4336 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4337 for (i = 0; i < count; i++)
4339 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4340 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4341 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4342 XVECEXP (par, 0, i) = tmp;
4348 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4352 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4353 enum machine_mode mode ATTRIBUTE_UNUSED,
4354 const_tree type ATTRIBUTE_UNUSED)
4356 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4357 pcum->aapcs_vfp_reg_alloc = 0;
4361 #define AAPCS_CP(X) \
4363 aapcs_ ## X ## _cum_init, \
4364 aapcs_ ## X ## _is_call_candidate, \
4365 aapcs_ ## X ## _allocate, \
4366 aapcs_ ## X ## _is_return_candidate, \
4367 aapcs_ ## X ## _allocate_return_reg, \
4368 aapcs_ ## X ## _advance \
4371 /* Table of co-processors that can be used to pass arguments in
4372 registers. Idealy no arugment should be a candidate for more than
4373 one co-processor table entry, but the table is processed in order
4374 and stops after the first match. If that entry then fails to put
4375 the argument into a co-processor register, the argument will go on
4379 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4380 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4382 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4383 BLKmode) is a candidate for this co-processor's registers; this
4384 function should ignore any position-dependent state in
4385 CUMULATIVE_ARGS and only use call-type dependent information. */
4386 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4388 /* Return true if the argument does get a co-processor register; it
4389 should set aapcs_reg to an RTX of the register allocated as is
4390 required for a return from FUNCTION_ARG. */
4391 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4393 /* Return true if a result of mode MODE (or type TYPE if MODE is
4394 BLKmode) is can be returned in this co-processor's registers. */
4395 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4397 /* Allocate and return an RTX element to hold the return type of a
4398 call, this routine must not fail and will only be called if
4399 is_return_candidate returned true with the same parameters. */
4400 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4402 /* Finish processing this argument and prepare to start processing
4404 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4405 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4413 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4418 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4419 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4426 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4428 /* We aren't passed a decl, so we can't check that a call is local.
4429 However, it isn't clear that that would be a win anyway, since it
4430 might limit some tail-calling opportunities. */
4431 enum arm_pcs pcs_variant;
4435 const_tree fndecl = NULL_TREE;
4437 if (TREE_CODE (fntype) == FUNCTION_DECL)
4440 fntype = TREE_TYPE (fntype);
4443 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4446 pcs_variant = arm_pcs_default;
4448 if (pcs_variant != ARM_PCS_AAPCS)
4452 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4453 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4462 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4465 /* We aren't passed a decl, so we can't check that a call is local.
4466 However, it isn't clear that that would be a win anyway, since it
4467 might limit some tail-calling opportunities. */
4468 enum arm_pcs pcs_variant;
4469 int unsignedp ATTRIBUTE_UNUSED;
4473 const_tree fndecl = NULL_TREE;
4475 if (TREE_CODE (fntype) == FUNCTION_DECL)
4478 fntype = TREE_TYPE (fntype);
4481 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4484 pcs_variant = arm_pcs_default;
4486 /* Promote integer types. */
4487 if (type && INTEGRAL_TYPE_P (type))
4488 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4490 if (pcs_variant != ARM_PCS_AAPCS)
4494 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4495 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4497 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4501 /* Promotes small structs returned in a register to full-word size
4502 for big-endian AAPCS. */
4503 if (type && arm_return_in_msb (type))
4505 HOST_WIDE_INT size = int_size_in_bytes (type);
4506 if (size % UNITS_PER_WORD != 0)
4508 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4509 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4513 return gen_rtx_REG (mode, R0_REGNUM);
4517 aapcs_libcall_value (enum machine_mode mode)
4519 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4520 && GET_MODE_SIZE (mode) <= 4)
4523 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4526 /* Lay out a function argument using the AAPCS rules. The rule
4527 numbers referred to here are those in the AAPCS. */
4529 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4530 const_tree type, bool named)
4535 /* We only need to do this once per argument. */
4536 if (pcum->aapcs_arg_processed)
4539 pcum->aapcs_arg_processed = true;
4541 /* Special case: if named is false then we are handling an incoming
4542 anonymous argument which is on the stack. */
4546 /* Is this a potential co-processor register candidate? */
4547 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4549 int slot = aapcs_select_call_coproc (pcum, mode, type);
4550 pcum->aapcs_cprc_slot = slot;
4552 /* We don't have to apply any of the rules from part B of the
4553 preparation phase, these are handled elsewhere in the
4558 /* A Co-processor register candidate goes either in its own
4559 class of registers or on the stack. */
4560 if (!pcum->aapcs_cprc_failed[slot])
4562 /* C1.cp - Try to allocate the argument to co-processor
4564 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4567 /* C2.cp - Put the argument on the stack and note that we
4568 can't assign any more candidates in this slot. We also
4569 need to note that we have allocated stack space, so that
4570 we won't later try to split a non-cprc candidate between
4571 core registers and the stack. */
4572 pcum->aapcs_cprc_failed[slot] = true;
4573 pcum->can_split = false;
4576 /* We didn't get a register, so this argument goes on the
4578 gcc_assert (pcum->can_split == false);
4583 /* C3 - For double-word aligned arguments, round the NCRN up to the
4584 next even number. */
4585 ncrn = pcum->aapcs_ncrn;
4586 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4589 nregs = ARM_NUM_REGS2(mode, type);
4591 /* Sigh, this test should really assert that nregs > 0, but a GCC
4592 extension allows empty structs and then gives them empty size; it
4593 then allows such a structure to be passed by value. For some of
4594 the code below we have to pretend that such an argument has
4595 non-zero size so that we 'locate' it correctly either in
4596 registers or on the stack. */
4597 gcc_assert (nregs >= 0);
4599 nregs2 = nregs ? nregs : 1;
4601 /* C4 - Argument fits entirely in core registers. */
4602 if (ncrn + nregs2 <= NUM_ARG_REGS)
4604 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4605 pcum->aapcs_next_ncrn = ncrn + nregs;
4609 /* C5 - Some core registers left and there are no arguments already
4610 on the stack: split this argument between the remaining core
4611 registers and the stack. */
4612 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4614 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4615 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4616 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4620 /* C6 - NCRN is set to 4. */
4621 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4623 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4627 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4628 for a call to a function whose data type is FNTYPE.
4629 For a library call, FNTYPE is NULL. */
4631 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4633 tree fndecl ATTRIBUTE_UNUSED)
4635 /* Long call handling. */
4637 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4639 pcum->pcs_variant = arm_pcs_default;
4641 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4643 if (arm_libcall_uses_aapcs_base (libname))
4644 pcum->pcs_variant = ARM_PCS_AAPCS;
4646 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4647 pcum->aapcs_reg = NULL_RTX;
4648 pcum->aapcs_partial = 0;
4649 pcum->aapcs_arg_processed = false;
4650 pcum->aapcs_cprc_slot = -1;
4651 pcum->can_split = true;
4653 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4657 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4659 pcum->aapcs_cprc_failed[i] = false;
4660 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4668 /* On the ARM, the offset starts at 0. */
4670 pcum->iwmmxt_nregs = 0;
4671 pcum->can_split = true;
4673 /* Varargs vectors are treated the same as long long.
4674 named_count avoids having to change the way arm handles 'named' */
4675 pcum->named_count = 0;
4678 if (TARGET_REALLY_IWMMXT && fntype)
4682 for (fn_arg = TYPE_ARG_TYPES (fntype);
4684 fn_arg = TREE_CHAIN (fn_arg))
4685 pcum->named_count += 1;
4687 if (! pcum->named_count)
4688 pcum->named_count = INT_MAX;
4693 /* Return true if mode/type need doubleword alignment. */
4695 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4697 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4698 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4702 /* Determine where to put an argument to a function.
4703 Value is zero to push the argument on the stack,
4704 or a hard register in which to store the argument.
4706 MODE is the argument's machine mode.
4707 TYPE is the data type of the argument (as a tree).
4708 This is null for libcalls where that information may
4710 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4711 the preceding args and about the function being called.
4712 NAMED is nonzero if this argument is a named parameter
4713 (otherwise it is an extra parameter matching an ellipsis).
4715 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4716 other arguments are passed on the stack. If (NAMED == 0) (which happens
4717 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4718 defined), say it is passed in the stack (function_prologue will
4719 indeed make it pass in the stack if necessary). */
4722 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4723 const_tree type, bool named)
4725 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4728 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4729 a call insn (op3 of a call_value insn). */
4730 if (mode == VOIDmode)
4733 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4735 aapcs_layout_arg (pcum, mode, type, named);
4736 return pcum->aapcs_reg;
4739 /* Varargs vectors are treated the same as long long.
4740 named_count avoids having to change the way arm handles 'named' */
4741 if (TARGET_IWMMXT_ABI
4742 && arm_vector_mode_supported_p (mode)
4743 && pcum->named_count > pcum->nargs + 1)
4745 if (pcum->iwmmxt_nregs <= 9)
4746 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4749 pcum->can_split = false;
4754 /* Put doubleword aligned quantities in even register pairs. */
4756 && ARM_DOUBLEWORD_ALIGN
4757 && arm_needs_doubleword_align (mode, type))
4760 /* Only allow splitting an arg between regs and memory if all preceding
4761 args were allocated to regs. For args passed by reference we only count
4762 the reference pointer. */
4763 if (pcum->can_split)
4766 nregs = ARM_NUM_REGS2 (mode, type);
4768 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4771 return gen_rtx_REG (mode, pcum->nregs);
4775 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4777 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4778 ? DOUBLEWORD_ALIGNMENT
4783 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4784 tree type, bool named)
4786 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4787 int nregs = pcum->nregs;
4789 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4791 aapcs_layout_arg (pcum, mode, type, named);
4792 return pcum->aapcs_partial;
4795 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4798 if (NUM_ARG_REGS > nregs
4799 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4801 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4806 /* Update the data in PCUM to advance over an argument
4807 of mode MODE and data type TYPE.
4808 (TYPE is null for libcalls where that information may not be available.) */
4811 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4812 const_tree type, bool named)
4814 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4816 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4818 aapcs_layout_arg (pcum, mode, type, named);
4820 if (pcum->aapcs_cprc_slot >= 0)
4822 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4824 pcum->aapcs_cprc_slot = -1;
4827 /* Generic stuff. */
4828 pcum->aapcs_arg_processed = false;
4829 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4830 pcum->aapcs_reg = NULL_RTX;
4831 pcum->aapcs_partial = 0;
4836 if (arm_vector_mode_supported_p (mode)
4837 && pcum->named_count > pcum->nargs
4838 && TARGET_IWMMXT_ABI)
4839 pcum->iwmmxt_nregs += 1;
4841 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4845 /* Variable sized types are passed by reference. This is a GCC
4846 extension to the ARM ABI. */
4849 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4850 enum machine_mode mode ATTRIBUTE_UNUSED,
4851 const_tree type, bool named ATTRIBUTE_UNUSED)
4853 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4856 /* Encode the current state of the #pragma [no_]long_calls. */
4859 OFF, /* No #pragma [no_]long_calls is in effect. */
4860 LONG, /* #pragma long_calls is in effect. */
4861 SHORT /* #pragma no_long_calls is in effect. */
4864 static arm_pragma_enum arm_pragma_long_calls = OFF;
4867 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4869 arm_pragma_long_calls = LONG;
4873 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4875 arm_pragma_long_calls = SHORT;
4879 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4881 arm_pragma_long_calls = OFF;
4884 /* Handle an attribute requiring a FUNCTION_DECL;
4885 arguments as in struct attribute_spec.handler. */
4887 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4888 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4890 if (TREE_CODE (*node) != FUNCTION_DECL)
4892 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4894 *no_add_attrs = true;
4900 /* Handle an "interrupt" or "isr" attribute;
4901 arguments as in struct attribute_spec.handler. */
4903 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4908 if (TREE_CODE (*node) != FUNCTION_DECL)
4910 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4912 *no_add_attrs = true;
4914 /* FIXME: the argument if any is checked for type attributes;
4915 should it be checked for decl ones? */
4919 if (TREE_CODE (*node) == FUNCTION_TYPE
4920 || TREE_CODE (*node) == METHOD_TYPE)
4922 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4924 warning (OPT_Wattributes, "%qE attribute ignored",
4926 *no_add_attrs = true;
4929 else if (TREE_CODE (*node) == POINTER_TYPE
4930 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4931 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4932 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4934 *node = build_variant_type_copy (*node);
4935 TREE_TYPE (*node) = build_type_attribute_variant
4937 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4938 *no_add_attrs = true;
4942 /* Possibly pass this attribute on from the type to a decl. */
4943 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4944 | (int) ATTR_FLAG_FUNCTION_NEXT
4945 | (int) ATTR_FLAG_ARRAY_NEXT))
4947 *no_add_attrs = true;
4948 return tree_cons (name, args, NULL_TREE);
4952 warning (OPT_Wattributes, "%qE attribute ignored",
4961 /* Handle a "pcs" attribute; arguments as in struct
4962 attribute_spec.handler. */
4964 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4965 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4967 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4969 warning (OPT_Wattributes, "%qE attribute ignored", name);
4970 *no_add_attrs = true;
4975 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4976 /* Handle the "notshared" attribute. This attribute is another way of
4977 requesting hidden visibility. ARM's compiler supports
4978 "__declspec(notshared)"; we support the same thing via an
4982 arm_handle_notshared_attribute (tree *node,
4983 tree name ATTRIBUTE_UNUSED,
4984 tree args ATTRIBUTE_UNUSED,
4985 int flags ATTRIBUTE_UNUSED,
4988 tree decl = TYPE_NAME (*node);
4992 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4993 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4994 *no_add_attrs = false;
5000 /* Return 0 if the attributes for two types are incompatible, 1 if they
5001 are compatible, and 2 if they are nearly compatible (which causes a
5002 warning to be generated). */
5004 arm_comp_type_attributes (const_tree type1, const_tree type2)
5008 /* Check for mismatch of non-default calling convention. */
5009 if (TREE_CODE (type1) != FUNCTION_TYPE)
5012 /* Check for mismatched call attributes. */
5013 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5014 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5015 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5016 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5018 /* Only bother to check if an attribute is defined. */
5019 if (l1 | l2 | s1 | s2)
5021 /* If one type has an attribute, the other must have the same attribute. */
5022 if ((l1 != l2) || (s1 != s2))
5025 /* Disallow mixed attributes. */
5026 if ((l1 & s2) || (l2 & s1))
5030 /* Check for mismatched ISR attribute. */
5031 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5033 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5034 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5036 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5043 /* Assigns default attributes to newly defined type. This is used to
5044 set short_call/long_call attributes for function types of
5045 functions defined inside corresponding #pragma scopes. */
5047 arm_set_default_type_attributes (tree type)
5049 /* Add __attribute__ ((long_call)) to all functions, when
5050 inside #pragma long_calls or __attribute__ ((short_call)),
5051 when inside #pragma no_long_calls. */
5052 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5054 tree type_attr_list, attr_name;
5055 type_attr_list = TYPE_ATTRIBUTES (type);
5057 if (arm_pragma_long_calls == LONG)
5058 attr_name = get_identifier ("long_call");
5059 else if (arm_pragma_long_calls == SHORT)
5060 attr_name = get_identifier ("short_call");
5064 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5065 TYPE_ATTRIBUTES (type) = type_attr_list;
5069 /* Return true if DECL is known to be linked into section SECTION. */
5072 arm_function_in_section_p (tree decl, section *section)
5074 /* We can only be certain about functions defined in the same
5075 compilation unit. */
5076 if (!TREE_STATIC (decl))
5079 /* Make sure that SYMBOL always binds to the definition in this
5080 compilation unit. */
5081 if (!targetm.binds_local_p (decl))
5084 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5085 if (!DECL_SECTION_NAME (decl))
5087 /* Make sure that we will not create a unique section for DECL. */
5088 if (flag_function_sections || DECL_ONE_ONLY (decl))
5092 return function_section (decl) == section;
5095 /* Return nonzero if a 32-bit "long_call" should be generated for
5096 a call from the current function to DECL. We generate a long_call
5099 a. has an __attribute__((long call))
5100 or b. is within the scope of a #pragma long_calls
5101 or c. the -mlong-calls command line switch has been specified
5103 However we do not generate a long call if the function:
5105 d. has an __attribute__ ((short_call))
5106 or e. is inside the scope of a #pragma no_long_calls
5107 or f. is defined in the same section as the current function. */
5110 arm_is_long_call_p (tree decl)
5115 return TARGET_LONG_CALLS;
5117 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5118 if (lookup_attribute ("short_call", attrs))
5121 /* For "f", be conservative, and only cater for cases in which the
5122 whole of the current function is placed in the same section. */
5123 if (!flag_reorder_blocks_and_partition
5124 && TREE_CODE (decl) == FUNCTION_DECL
5125 && arm_function_in_section_p (decl, current_function_section ()))
5128 if (lookup_attribute ("long_call", attrs))
5131 return TARGET_LONG_CALLS;
5134 /* Return nonzero if it is ok to make a tail-call to DECL. */
5136 arm_function_ok_for_sibcall (tree decl, tree exp)
5138 unsigned long func_type;
5140 if (cfun->machine->sibcall_blocked)
5143 /* Never tailcall something for which we have no decl, or if we
5144 are generating code for Thumb-1. */
5145 if (decl == NULL || TARGET_THUMB1)
5148 /* The PIC register is live on entry to VxWorks PLT entries, so we
5149 must make the call before restoring the PIC register. */
5150 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5153 /* Cannot tail-call to long calls, since these are out of range of
5154 a branch instruction. */
5155 if (arm_is_long_call_p (decl))
5158 /* If we are interworking and the function is not declared static
5159 then we can't tail-call it unless we know that it exists in this
5160 compilation unit (since it might be a Thumb routine). */
5161 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5164 func_type = arm_current_func_type ();
5165 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5166 if (IS_INTERRUPT (func_type))
5169 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5171 /* Check that the return value locations are the same. For
5172 example that we aren't returning a value from the sibling in
5173 a VFP register but then need to transfer it to a core
5177 a = arm_function_value (TREE_TYPE (exp), decl, false);
5178 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5180 if (!rtx_equal_p (a, b))
5184 /* Never tailcall if function may be called with a misaligned SP. */
5185 if (IS_STACKALIGN (func_type))
5188 /* Everything else is ok. */
5193 /* Addressing mode support functions. */
5195 /* Return nonzero if X is a legitimate immediate operand when compiling
5196 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5198 legitimate_pic_operand_p (rtx x)
5200 if (GET_CODE (x) == SYMBOL_REF
5201 || (GET_CODE (x) == CONST
5202 && GET_CODE (XEXP (x, 0)) == PLUS
5203 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5209 /* Record that the current function needs a PIC register. Initialize
5210 cfun->machine->pic_reg if we have not already done so. */
5213 require_pic_register (void)
5215 /* A lot of the logic here is made obscure by the fact that this
5216 routine gets called as part of the rtx cost estimation process.
5217 We don't want those calls to affect any assumptions about the real
5218 function; and further, we can't call entry_of_function() until we
5219 start the real expansion process. */
5220 if (!crtl->uses_pic_offset_table)
5222 gcc_assert (can_create_pseudo_p ());
5223 if (arm_pic_register != INVALID_REGNUM)
5225 if (!cfun->machine->pic_reg)
5226 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5228 /* Play games to avoid marking the function as needing pic
5229 if we are being called as part of the cost-estimation
5231 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5232 crtl->uses_pic_offset_table = 1;
5238 if (!cfun->machine->pic_reg)
5239 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5241 /* Play games to avoid marking the function as needing pic
5242 if we are being called as part of the cost-estimation
5244 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5246 crtl->uses_pic_offset_table = 1;
5249 arm_load_pic_register (0UL);
5254 for (insn = seq; insn; insn = NEXT_INSN (insn))
5256 INSN_LOCATOR (insn) = prologue_locator;
5258 /* We can be called during expansion of PHI nodes, where
5259 we can't yet emit instructions directly in the final
5260 insn stream. Queue the insns on the entry edge, they will
5261 be committed after everything else is expanded. */
5262 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5269 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5271 if (GET_CODE (orig) == SYMBOL_REF
5272 || GET_CODE (orig) == LABEL_REF)
5278 gcc_assert (can_create_pseudo_p ());
5279 reg = gen_reg_rtx (Pmode);
5282 /* VxWorks does not impose a fixed gap between segments; the run-time
5283 gap can be different from the object-file gap. We therefore can't
5284 use GOTOFF unless we are absolutely sure that the symbol is in the
5285 same segment as the GOT. Unfortunately, the flexibility of linker
5286 scripts means that we can't be sure of that in general, so assume
5287 that GOTOFF is never valid on VxWorks. */
5288 if ((GET_CODE (orig) == LABEL_REF
5289 || (GET_CODE (orig) == SYMBOL_REF &&
5290 SYMBOL_REF_LOCAL_P (orig)))
5292 && !TARGET_VXWORKS_RTP)
5293 insn = arm_pic_static_addr (orig, reg);
5299 /* If this function doesn't have a pic register, create one now. */
5300 require_pic_register ();
5302 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5304 /* Make the MEM as close to a constant as possible. */
5305 mem = SET_SRC (pat);
5306 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5307 MEM_READONLY_P (mem) = 1;
5308 MEM_NOTRAP_P (mem) = 1;
5310 insn = emit_insn (pat);
5313 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5315 set_unique_reg_note (insn, REG_EQUAL, orig);
5319 else if (GET_CODE (orig) == CONST)
5323 if (GET_CODE (XEXP (orig, 0)) == PLUS
5324 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5327 /* Handle the case where we have: const (UNSPEC_TLS). */
5328 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5329 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5332 /* Handle the case where we have:
5333 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5335 if (GET_CODE (XEXP (orig, 0)) == PLUS
5336 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5337 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5339 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5345 gcc_assert (can_create_pseudo_p ());
5346 reg = gen_reg_rtx (Pmode);
5349 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5351 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5352 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5353 base == reg ? 0 : reg);
5355 if (GET_CODE (offset) == CONST_INT)
5357 /* The base register doesn't really matter, we only want to
5358 test the index for the appropriate mode. */
5359 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5361 gcc_assert (can_create_pseudo_p ());
5362 offset = force_reg (Pmode, offset);
5365 if (GET_CODE (offset) == CONST_INT)
5366 return plus_constant (base, INTVAL (offset));
5369 if (GET_MODE_SIZE (mode) > 4
5370 && (GET_MODE_CLASS (mode) == MODE_INT
5371 || TARGET_SOFT_FLOAT))
5373 emit_insn (gen_addsi3 (reg, base, offset));
5377 return gen_rtx_PLUS (Pmode, base, offset);
5384 /* Find a spare register to use during the prolog of a function. */
5387 thumb_find_work_register (unsigned long pushed_regs_mask)
5391 /* Check the argument registers first as these are call-used. The
5392 register allocation order means that sometimes r3 might be used
5393 but earlier argument registers might not, so check them all. */
5394 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5395 if (!df_regs_ever_live_p (reg))
5398 /* Before going on to check the call-saved registers we can try a couple
5399 more ways of deducing that r3 is available. The first is when we are
5400 pushing anonymous arguments onto the stack and we have less than 4
5401 registers worth of fixed arguments(*). In this case r3 will be part of
5402 the variable argument list and so we can be sure that it will be
5403 pushed right at the start of the function. Hence it will be available
5404 for the rest of the prologue.
5405 (*): ie crtl->args.pretend_args_size is greater than 0. */
5406 if (cfun->machine->uses_anonymous_args
5407 && crtl->args.pretend_args_size > 0)
5408 return LAST_ARG_REGNUM;
5410 /* The other case is when we have fixed arguments but less than 4 registers
5411 worth. In this case r3 might be used in the body of the function, but
5412 it is not being used to convey an argument into the function. In theory
5413 we could just check crtl->args.size to see how many bytes are
5414 being passed in argument registers, but it seems that it is unreliable.
5415 Sometimes it will have the value 0 when in fact arguments are being
5416 passed. (See testcase execute/20021111-1.c for an example). So we also
5417 check the args_info.nregs field as well. The problem with this field is
5418 that it makes no allowances for arguments that are passed to the
5419 function but which are not used. Hence we could miss an opportunity
5420 when a function has an unused argument in r3. But it is better to be
5421 safe than to be sorry. */
5422 if (! cfun->machine->uses_anonymous_args
5423 && crtl->args.size >= 0
5424 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5425 && crtl->args.info.nregs < 4)
5426 return LAST_ARG_REGNUM;
5428 /* Otherwise look for a call-saved register that is going to be pushed. */
5429 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5430 if (pushed_regs_mask & (1 << reg))
5435 /* Thumb-2 can use high regs. */
5436 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5437 if (pushed_regs_mask & (1 << reg))
5440 /* Something went wrong - thumb_compute_save_reg_mask()
5441 should have arranged for a suitable register to be pushed. */
5445 static GTY(()) int pic_labelno;
5447 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5451 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5453 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5455 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5458 gcc_assert (flag_pic);
5460 pic_reg = cfun->machine->pic_reg;
5461 if (TARGET_VXWORKS_RTP)
5463 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5464 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5465 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5467 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5469 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5470 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5474 /* We use an UNSPEC rather than a LABEL_REF because this label
5475 never appears in the code stream. */
5477 labelno = GEN_INT (pic_labelno++);
5478 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5479 l1 = gen_rtx_CONST (VOIDmode, l1);
5481 /* On the ARM the PC register contains 'dot + 8' at the time of the
5482 addition, on the Thumb it is 'dot + 4'. */
5483 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5484 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5486 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5490 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5492 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5494 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5496 else /* TARGET_THUMB1 */
5498 if (arm_pic_register != INVALID_REGNUM
5499 && REGNO (pic_reg) > LAST_LO_REGNUM)
5501 /* We will have pushed the pic register, so we should always be
5502 able to find a work register. */
5503 pic_tmp = gen_rtx_REG (SImode,
5504 thumb_find_work_register (saved_regs));
5505 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5506 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5509 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5510 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5514 /* Need to emit this whether or not we obey regdecls,
5515 since setjmp/longjmp can cause life info to screw up. */
5519 /* Generate code to load the address of a static var when flag_pic is set. */
5521 arm_pic_static_addr (rtx orig, rtx reg)
5523 rtx l1, labelno, offset_rtx, insn;
5525 gcc_assert (flag_pic);
5527 /* We use an UNSPEC rather than a LABEL_REF because this label
5528 never appears in the code stream. */
5529 labelno = GEN_INT (pic_labelno++);
5530 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5531 l1 = gen_rtx_CONST (VOIDmode, l1);
5533 /* On the ARM the PC register contains 'dot + 8' at the time of the
5534 addition, on the Thumb it is 'dot + 4'. */
5535 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5536 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5537 UNSPEC_SYMBOL_OFFSET);
5538 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5542 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5544 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5546 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5548 else /* TARGET_THUMB1 */
5550 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5551 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5557 /* Return nonzero if X is valid as an ARM state addressing register. */
5559 arm_address_register_rtx_p (rtx x, int strict_p)
5563 if (GET_CODE (x) != REG)
5569 return ARM_REGNO_OK_FOR_BASE_P (regno);
5571 return (regno <= LAST_ARM_REGNUM
5572 || regno >= FIRST_PSEUDO_REGISTER
5573 || regno == FRAME_POINTER_REGNUM
5574 || regno == ARG_POINTER_REGNUM);
5577 /* Return TRUE if this rtx is the difference of a symbol and a label,
5578 and will reduce to a PC-relative relocation in the object file.
5579 Expressions like this can be left alone when generating PIC, rather
5580 than forced through the GOT. */
5582 pcrel_constant_p (rtx x)
5584 if (GET_CODE (x) == MINUS)
5585 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5590 /* Return true if X will surely end up in an index register after next
5593 will_be_in_index_register (const_rtx x)
5595 /* arm.md: calculate_pic_address will split this into a register. */
5596 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5599 /* Return nonzero if X is a valid ARM state address operand. */
5601 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5605 enum rtx_code code = GET_CODE (x);
5607 if (arm_address_register_rtx_p (x, strict_p))
5610 use_ldrd = (TARGET_LDRD
5612 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5614 if (code == POST_INC || code == PRE_DEC
5615 || ((code == PRE_INC || code == POST_DEC)
5616 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5617 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5619 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5620 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5621 && GET_CODE (XEXP (x, 1)) == PLUS
5622 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5624 rtx addend = XEXP (XEXP (x, 1), 1);
5626 /* Don't allow ldrd post increment by register because it's hard
5627 to fixup invalid register choices. */
5629 && GET_CODE (x) == POST_MODIFY
5630 && GET_CODE (addend) == REG)
5633 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5634 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5637 /* After reload constants split into minipools will have addresses
5638 from a LABEL_REF. */
5639 else if (reload_completed
5640 && (code == LABEL_REF
5642 && GET_CODE (XEXP (x, 0)) == PLUS
5643 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5644 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5647 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5650 else if (code == PLUS)
5652 rtx xop0 = XEXP (x, 0);
5653 rtx xop1 = XEXP (x, 1);
5655 return ((arm_address_register_rtx_p (xop0, strict_p)
5656 && ((GET_CODE(xop1) == CONST_INT
5657 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5658 || (!strict_p && will_be_in_index_register (xop1))))
5659 || (arm_address_register_rtx_p (xop1, strict_p)
5660 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5664 /* Reload currently can't handle MINUS, so disable this for now */
5665 else if (GET_CODE (x) == MINUS)
5667 rtx xop0 = XEXP (x, 0);
5668 rtx xop1 = XEXP (x, 1);
5670 return (arm_address_register_rtx_p (xop0, strict_p)
5671 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5675 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5676 && code == SYMBOL_REF
5677 && CONSTANT_POOL_ADDRESS_P (x)
5679 && symbol_mentioned_p (get_pool_constant (x))
5680 && ! pcrel_constant_p (get_pool_constant (x))))
5686 /* Return nonzero if X is a valid Thumb-2 address operand. */
5688 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5691 enum rtx_code code = GET_CODE (x);
5693 if (arm_address_register_rtx_p (x, strict_p))
5696 use_ldrd = (TARGET_LDRD
5698 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5700 if (code == POST_INC || code == PRE_DEC
5701 || ((code == PRE_INC || code == POST_DEC)
5702 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5703 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5705 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5706 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5707 && GET_CODE (XEXP (x, 1)) == PLUS
5708 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5710 /* Thumb-2 only has autoincrement by constant. */
5711 rtx addend = XEXP (XEXP (x, 1), 1);
5712 HOST_WIDE_INT offset;
5714 if (GET_CODE (addend) != CONST_INT)
5717 offset = INTVAL(addend);
5718 if (GET_MODE_SIZE (mode) <= 4)
5719 return (offset > -256 && offset < 256);
5721 return (use_ldrd && offset > -1024 && offset < 1024
5722 && (offset & 3) == 0);
5725 /* After reload constants split into minipools will have addresses
5726 from a LABEL_REF. */
5727 else if (reload_completed
5728 && (code == LABEL_REF
5730 && GET_CODE (XEXP (x, 0)) == PLUS
5731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5732 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5735 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5738 else if (code == PLUS)
5740 rtx xop0 = XEXP (x, 0);
5741 rtx xop1 = XEXP (x, 1);
5743 return ((arm_address_register_rtx_p (xop0, strict_p)
5744 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5745 || (!strict_p && will_be_in_index_register (xop1))))
5746 || (arm_address_register_rtx_p (xop1, strict_p)
5747 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5750 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5751 && code == SYMBOL_REF
5752 && CONSTANT_POOL_ADDRESS_P (x)
5754 && symbol_mentioned_p (get_pool_constant (x))
5755 && ! pcrel_constant_p (get_pool_constant (x))))
5761 /* Return nonzero if INDEX is valid for an address index operand in
5764 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5767 HOST_WIDE_INT range;
5768 enum rtx_code code = GET_CODE (index);
5770 /* Standard coprocessor addressing modes. */
5771 if (TARGET_HARD_FLOAT
5772 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5773 && (mode == SFmode || mode == DFmode
5774 || (TARGET_MAVERICK && mode == DImode)))
5775 return (code == CONST_INT && INTVAL (index) < 1024
5776 && INTVAL (index) > -1024
5777 && (INTVAL (index) & 3) == 0);
5779 /* For quad modes, we restrict the constant offset to be slightly less
5780 than what the instruction format permits. We do this because for
5781 quad mode moves, we will actually decompose them into two separate
5782 double-mode reads or writes. INDEX must therefore be a valid
5783 (double-mode) offset and so should INDEX+8. */
5784 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5785 return (code == CONST_INT
5786 && INTVAL (index) < 1016
5787 && INTVAL (index) > -1024
5788 && (INTVAL (index) & 3) == 0);
5790 /* We have no such constraint on double mode offsets, so we permit the
5791 full range of the instruction format. */
5792 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5793 return (code == CONST_INT
5794 && INTVAL (index) < 1024
5795 && INTVAL (index) > -1024
5796 && (INTVAL (index) & 3) == 0);
5798 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5799 return (code == CONST_INT
5800 && INTVAL (index) < 1024
5801 && INTVAL (index) > -1024
5802 && (INTVAL (index) & 3) == 0);
5804 if (arm_address_register_rtx_p (index, strict_p)
5805 && (GET_MODE_SIZE (mode) <= 4))
5808 if (mode == DImode || mode == DFmode)
5810 if (code == CONST_INT)
5812 HOST_WIDE_INT val = INTVAL (index);
5815 return val > -256 && val < 256;
5817 return val > -4096 && val < 4092;
5820 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5823 if (GET_MODE_SIZE (mode) <= 4
5827 || (mode == QImode && outer == SIGN_EXTEND))))
5831 rtx xiop0 = XEXP (index, 0);
5832 rtx xiop1 = XEXP (index, 1);
5834 return ((arm_address_register_rtx_p (xiop0, strict_p)
5835 && power_of_two_operand (xiop1, SImode))
5836 || (arm_address_register_rtx_p (xiop1, strict_p)
5837 && power_of_two_operand (xiop0, SImode)));
5839 else if (code == LSHIFTRT || code == ASHIFTRT
5840 || code == ASHIFT || code == ROTATERT)
5842 rtx op = XEXP (index, 1);
5844 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5845 && GET_CODE (op) == CONST_INT
5847 && INTVAL (op) <= 31);
5851 /* For ARM v4 we may be doing a sign-extend operation during the
5857 || (outer == SIGN_EXTEND && mode == QImode))
5863 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5865 return (code == CONST_INT
5866 && INTVAL (index) < range
5867 && INTVAL (index) > -range);
5870 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5871 index operand. i.e. 1, 2, 4 or 8. */
5873 thumb2_index_mul_operand (rtx op)
5877 if (GET_CODE(op) != CONST_INT)
5881 return (val == 1 || val == 2 || val == 4 || val == 8);
5884 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5886 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5888 enum rtx_code code = GET_CODE (index);
5890 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5891 /* Standard coprocessor addressing modes. */
5892 if (TARGET_HARD_FLOAT
5893 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5894 && (mode == SFmode || mode == DFmode
5895 || (TARGET_MAVERICK && mode == DImode)))
5896 return (code == CONST_INT && INTVAL (index) < 1024
5897 /* Thumb-2 allows only > -256 index range for it's core register
5898 load/stores. Since we allow SF/DF in core registers, we have
5899 to use the intersection between -256~4096 (core) and -1024~1024
5901 && INTVAL (index) > -256
5902 && (INTVAL (index) & 3) == 0);
5904 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5906 /* For DImode assume values will usually live in core regs
5907 and only allow LDRD addressing modes. */
5908 if (!TARGET_LDRD || mode != DImode)
5909 return (code == CONST_INT
5910 && INTVAL (index) < 1024
5911 && INTVAL (index) > -1024
5912 && (INTVAL (index) & 3) == 0);
5915 /* For quad modes, we restrict the constant offset to be slightly less
5916 than what the instruction format permits. We do this because for
5917 quad mode moves, we will actually decompose them into two separate
5918 double-mode reads or writes. INDEX must therefore be a valid
5919 (double-mode) offset and so should INDEX+8. */
5920 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5921 return (code == CONST_INT
5922 && INTVAL (index) < 1016
5923 && INTVAL (index) > -1024
5924 && (INTVAL (index) & 3) == 0);
5926 /* We have no such constraint on double mode offsets, so we permit the
5927 full range of the instruction format. */
5928 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5929 return (code == CONST_INT
5930 && INTVAL (index) < 1024
5931 && INTVAL (index) > -1024
5932 && (INTVAL (index) & 3) == 0);
5934 if (arm_address_register_rtx_p (index, strict_p)
5935 && (GET_MODE_SIZE (mode) <= 4))
5938 if (mode == DImode || mode == DFmode)
5940 if (code == CONST_INT)
5942 HOST_WIDE_INT val = INTVAL (index);
5943 /* ??? Can we assume ldrd for thumb2? */
5944 /* Thumb-2 ldrd only has reg+const addressing modes. */
5945 /* ldrd supports offsets of +-1020.
5946 However the ldr fallback does not. */
5947 return val > -256 && val < 256 && (val & 3) == 0;
5955 rtx xiop0 = XEXP (index, 0);
5956 rtx xiop1 = XEXP (index, 1);
5958 return ((arm_address_register_rtx_p (xiop0, strict_p)
5959 && thumb2_index_mul_operand (xiop1))
5960 || (arm_address_register_rtx_p (xiop1, strict_p)
5961 && thumb2_index_mul_operand (xiop0)));
5963 else if (code == ASHIFT)
5965 rtx op = XEXP (index, 1);
5967 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5968 && GET_CODE (op) == CONST_INT
5970 && INTVAL (op) <= 3);
5973 return (code == CONST_INT
5974 && INTVAL (index) < 4096
5975 && INTVAL (index) > -256);
5978 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5980 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5984 if (GET_CODE (x) != REG)
5990 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5992 return (regno <= LAST_LO_REGNUM
5993 || regno > LAST_VIRTUAL_REGISTER
5994 || regno == FRAME_POINTER_REGNUM
5995 || (GET_MODE_SIZE (mode) >= 4
5996 && (regno == STACK_POINTER_REGNUM
5997 || regno >= FIRST_PSEUDO_REGISTER
5998 || x == hard_frame_pointer_rtx
5999 || x == arg_pointer_rtx)));
6002 /* Return nonzero if x is a legitimate index register. This is the case
6003 for any base register that can access a QImode object. */
6005 thumb1_index_register_rtx_p (rtx x, int strict_p)
6007 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6010 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6012 The AP may be eliminated to either the SP or the FP, so we use the
6013 least common denominator, e.g. SImode, and offsets from 0 to 64.
6015 ??? Verify whether the above is the right approach.
6017 ??? Also, the FP may be eliminated to the SP, so perhaps that
6018 needs special handling also.
6020 ??? Look at how the mips16 port solves this problem. It probably uses
6021 better ways to solve some of these problems.
6023 Although it is not incorrect, we don't accept QImode and HImode
6024 addresses based on the frame pointer or arg pointer until the
6025 reload pass starts. This is so that eliminating such addresses
6026 into stack based ones won't produce impossible code. */
6028 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6030 /* ??? Not clear if this is right. Experiment. */
6031 if (GET_MODE_SIZE (mode) < 4
6032 && !(reload_in_progress || reload_completed)
6033 && (reg_mentioned_p (frame_pointer_rtx, x)
6034 || reg_mentioned_p (arg_pointer_rtx, x)
6035 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6036 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6037 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6038 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6041 /* Accept any base register. SP only in SImode or larger. */
6042 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6045 /* This is PC relative data before arm_reorg runs. */
6046 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6047 && GET_CODE (x) == SYMBOL_REF
6048 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6051 /* This is PC relative data after arm_reorg runs. */
6052 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6054 && (GET_CODE (x) == LABEL_REF
6055 || (GET_CODE (x) == CONST
6056 && GET_CODE (XEXP (x, 0)) == PLUS
6057 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6058 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6061 /* Post-inc indexing only supported for SImode and larger. */
6062 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6063 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6066 else if (GET_CODE (x) == PLUS)
6068 /* REG+REG address can be any two index registers. */
6069 /* We disallow FRAME+REG addressing since we know that FRAME
6070 will be replaced with STACK, and SP relative addressing only
6071 permits SP+OFFSET. */
6072 if (GET_MODE_SIZE (mode) <= 4
6073 && XEXP (x, 0) != frame_pointer_rtx
6074 && XEXP (x, 1) != frame_pointer_rtx
6075 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6076 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6077 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6080 /* REG+const has 5-7 bit offset for non-SP registers. */
6081 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6082 || XEXP (x, 0) == arg_pointer_rtx)
6083 && GET_CODE (XEXP (x, 1)) == CONST_INT
6084 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6087 /* REG+const has 10-bit offset for SP, but only SImode and
6088 larger is supported. */
6089 /* ??? Should probably check for DI/DFmode overflow here
6090 just like GO_IF_LEGITIMATE_OFFSET does. */
6091 else if (GET_CODE (XEXP (x, 0)) == REG
6092 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6093 && GET_MODE_SIZE (mode) >= 4
6094 && GET_CODE (XEXP (x, 1)) == CONST_INT
6095 && INTVAL (XEXP (x, 1)) >= 0
6096 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6097 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6100 else if (GET_CODE (XEXP (x, 0)) == REG
6101 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6102 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6103 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6104 && REGNO (XEXP (x, 0))
6105 <= LAST_VIRTUAL_POINTER_REGISTER))
6106 && GET_MODE_SIZE (mode) >= 4
6107 && GET_CODE (XEXP (x, 1)) == CONST_INT
6108 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6112 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6113 && GET_MODE_SIZE (mode) == 4
6114 && GET_CODE (x) == SYMBOL_REF
6115 && CONSTANT_POOL_ADDRESS_P (x)
6117 && symbol_mentioned_p (get_pool_constant (x))
6118 && ! pcrel_constant_p (get_pool_constant (x))))
6124 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6125 instruction of mode MODE. */
6127 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6129 switch (GET_MODE_SIZE (mode))
6132 return val >= 0 && val < 32;
6135 return val >= 0 && val < 64 && (val & 1) == 0;
6139 && (val + GET_MODE_SIZE (mode)) <= 128
6145 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6148 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6149 else if (TARGET_THUMB2)
6150 return thumb2_legitimate_address_p (mode, x, strict_p);
6151 else /* if (TARGET_THUMB1) */
6152 return thumb1_legitimate_address_p (mode, x, strict_p);
6155 /* Build the SYMBOL_REF for __tls_get_addr. */
6157 static GTY(()) rtx tls_get_addr_libfunc;
6160 get_tls_get_addr (void)
6162 if (!tls_get_addr_libfunc)
6163 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6164 return tls_get_addr_libfunc;
6168 arm_load_tp (rtx target)
6171 target = gen_reg_rtx (SImode);
6175 /* Can return in any reg. */
6176 emit_insn (gen_load_tp_hard (target));
6180 /* Always returned in r0. Immediately copy the result into a pseudo,
6181 otherwise other uses of r0 (e.g. setting up function arguments) may
6182 clobber the value. */
6186 emit_insn (gen_load_tp_soft ());
6188 tmp = gen_rtx_REG (SImode, 0);
6189 emit_move_insn (target, tmp);
6195 load_tls_operand (rtx x, rtx reg)
6199 if (reg == NULL_RTX)
6200 reg = gen_reg_rtx (SImode);
6202 tmp = gen_rtx_CONST (SImode, x);
6204 emit_move_insn (reg, tmp);
6210 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6212 rtx insns, label, labelno, sum;
6214 gcc_assert (reloc != TLS_DESCSEQ);
6217 labelno = GEN_INT (pic_labelno++);
6218 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6219 label = gen_rtx_CONST (VOIDmode, label);
6221 sum = gen_rtx_UNSPEC (Pmode,
6222 gen_rtvec (4, x, GEN_INT (reloc), label,
6223 GEN_INT (TARGET_ARM ? 8 : 4)),
6225 reg = load_tls_operand (sum, reg);
6228 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6230 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6232 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6233 LCT_PURE, /* LCT_CONST? */
6234 Pmode, 1, reg, Pmode);
6236 insns = get_insns ();
6243 arm_tls_descseq_addr (rtx x, rtx reg)
6245 rtx labelno = GEN_INT (pic_labelno++);
6246 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6247 rtx sum = gen_rtx_UNSPEC (Pmode,
6248 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6249 gen_rtx_CONST (VOIDmode, label),
6250 GEN_INT (!TARGET_ARM)),
6252 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6254 emit_insn (gen_tlscall (x, labelno));
6256 reg = gen_reg_rtx (SImode);
6258 gcc_assert (REGNO (reg) != 0);
6260 emit_move_insn (reg, reg0);
6266 legitimize_tls_address (rtx x, rtx reg)
6268 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6269 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6273 case TLS_MODEL_GLOBAL_DYNAMIC:
6274 if (TARGET_GNU2_TLS)
6276 reg = arm_tls_descseq_addr (x, reg);
6278 tp = arm_load_tp (NULL_RTX);
6280 dest = gen_rtx_PLUS (Pmode, tp, reg);
6284 /* Original scheme */
6285 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6286 dest = gen_reg_rtx (Pmode);
6287 emit_libcall_block (insns, dest, ret, x);
6291 case TLS_MODEL_LOCAL_DYNAMIC:
6292 if (TARGET_GNU2_TLS)
6294 reg = arm_tls_descseq_addr (x, reg);
6296 tp = arm_load_tp (NULL_RTX);
6298 dest = gen_rtx_PLUS (Pmode, tp, reg);
6302 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6304 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6305 share the LDM result with other LD model accesses. */
6306 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6308 dest = gen_reg_rtx (Pmode);
6309 emit_libcall_block (insns, dest, ret, eqv);
6311 /* Load the addend. */
6312 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6313 GEN_INT (TLS_LDO32)),
6315 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6316 dest = gen_rtx_PLUS (Pmode, dest, addend);
6320 case TLS_MODEL_INITIAL_EXEC:
6321 labelno = GEN_INT (pic_labelno++);
6322 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6323 label = gen_rtx_CONST (VOIDmode, label);
6324 sum = gen_rtx_UNSPEC (Pmode,
6325 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6326 GEN_INT (TARGET_ARM ? 8 : 4)),
6328 reg = load_tls_operand (sum, reg);
6331 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6332 else if (TARGET_THUMB2)
6333 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6336 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6337 emit_move_insn (reg, gen_const_mem (SImode, reg));
6340 tp = arm_load_tp (NULL_RTX);
6342 return gen_rtx_PLUS (Pmode, tp, reg);
6344 case TLS_MODEL_LOCAL_EXEC:
6345 tp = arm_load_tp (NULL_RTX);
6347 reg = gen_rtx_UNSPEC (Pmode,
6348 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6350 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6352 return gen_rtx_PLUS (Pmode, tp, reg);
6359 /* Try machine-dependent ways of modifying an illegitimate address
6360 to be legitimate. If we find one, return the new, valid address. */
6362 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6366 /* TODO: legitimize_address for Thumb2. */
6369 return thumb_legitimize_address (x, orig_x, mode);
6372 if (arm_tls_symbol_p (x))
6373 return legitimize_tls_address (x, NULL_RTX);
6375 if (GET_CODE (x) == PLUS)
6377 rtx xop0 = XEXP (x, 0);
6378 rtx xop1 = XEXP (x, 1);
6380 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6381 xop0 = force_reg (SImode, xop0);
6383 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6384 xop1 = force_reg (SImode, xop1);
6386 if (ARM_BASE_REGISTER_RTX_P (xop0)
6387 && GET_CODE (xop1) == CONST_INT)
6389 HOST_WIDE_INT n, low_n;
6393 /* VFP addressing modes actually allow greater offsets, but for
6394 now we just stick with the lowest common denominator. */
6396 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6408 low_n = ((mode) == TImode ? 0
6409 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6413 base_reg = gen_reg_rtx (SImode);
6414 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6415 emit_move_insn (base_reg, val);
6416 x = plus_constant (base_reg, low_n);
6418 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6419 x = gen_rtx_PLUS (SImode, xop0, xop1);
6422 /* XXX We don't allow MINUS any more -- see comment in
6423 arm_legitimate_address_outer_p (). */
6424 else if (GET_CODE (x) == MINUS)
6426 rtx xop0 = XEXP (x, 0);
6427 rtx xop1 = XEXP (x, 1);
6429 if (CONSTANT_P (xop0))
6430 xop0 = force_reg (SImode, xop0);
6432 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6433 xop1 = force_reg (SImode, xop1);
6435 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6436 x = gen_rtx_MINUS (SImode, xop0, xop1);
6439 /* Make sure to take full advantage of the pre-indexed addressing mode
6440 with absolute addresses which often allows for the base register to
6441 be factorized for multiple adjacent memory references, and it might
6442 even allows for the mini pool to be avoided entirely. */
6443 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6446 HOST_WIDE_INT mask, base, index;
6449 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6450 use a 8-bit index. So let's use a 12-bit index for SImode only and
6451 hope that arm_gen_constant will enable ldrb to use more bits. */
6452 bits = (mode == SImode) ? 12 : 8;
6453 mask = (1 << bits) - 1;
6454 base = INTVAL (x) & ~mask;
6455 index = INTVAL (x) & mask;
6456 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6458 /* It'll most probably be more efficient to generate the base
6459 with more bits set and use a negative index instead. */
6463 base_reg = force_reg (SImode, GEN_INT (base));
6464 x = plus_constant (base_reg, index);
6469 /* We need to find and carefully transform any SYMBOL and LABEL
6470 references; so go back to the original address expression. */
6471 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6473 if (new_x != orig_x)
6481 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6482 to be legitimate. If we find one, return the new, valid address. */
6484 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6486 if (arm_tls_symbol_p (x))
6487 return legitimize_tls_address (x, NULL_RTX);
6489 if (GET_CODE (x) == PLUS
6490 && GET_CODE (XEXP (x, 1)) == CONST_INT
6491 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6492 || INTVAL (XEXP (x, 1)) < 0))
6494 rtx xop0 = XEXP (x, 0);
6495 rtx xop1 = XEXP (x, 1);
6496 HOST_WIDE_INT offset = INTVAL (xop1);
6498 /* Try and fold the offset into a biasing of the base register and
6499 then offsetting that. Don't do this when optimizing for space
6500 since it can cause too many CSEs. */
6501 if (optimize_size && offset >= 0
6502 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6504 HOST_WIDE_INT delta;
6507 delta = offset - (256 - GET_MODE_SIZE (mode));
6508 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6509 delta = 31 * GET_MODE_SIZE (mode);
6511 delta = offset & (~31 * GET_MODE_SIZE (mode));
6513 xop0 = force_operand (plus_constant (xop0, offset - delta),
6515 x = plus_constant (xop0, delta);
6517 else if (offset < 0 && offset > -256)
6518 /* Small negative offsets are best done with a subtract before the
6519 dereference, forcing these into a register normally takes two
6521 x = force_operand (x, NULL_RTX);
6524 /* For the remaining cases, force the constant into a register. */
6525 xop1 = force_reg (SImode, xop1);
6526 x = gen_rtx_PLUS (SImode, xop0, xop1);
6529 else if (GET_CODE (x) == PLUS
6530 && s_register_operand (XEXP (x, 1), SImode)
6531 && !s_register_operand (XEXP (x, 0), SImode))
6533 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6535 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6540 /* We need to find and carefully transform any SYMBOL and LABEL
6541 references; so go back to the original address expression. */
6542 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6544 if (new_x != orig_x)
6552 arm_legitimize_reload_address (rtx *p,
6553 enum machine_mode mode,
6554 int opnum, int type,
6555 int ind_levels ATTRIBUTE_UNUSED)
6557 /* We must recognize output that we have already generated ourselves. */
6558 if (GET_CODE (*p) == PLUS
6559 && GET_CODE (XEXP (*p, 0)) == PLUS
6560 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6561 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6562 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6564 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6565 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6566 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6570 if (GET_CODE (*p) == PLUS
6571 && GET_CODE (XEXP (*p, 0)) == REG
6572 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6573 /* If the base register is equivalent to a constant, let the generic
6574 code handle it. Otherwise we will run into problems if a future
6575 reload pass decides to rematerialize the constant. */
6576 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6577 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6579 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6580 HOST_WIDE_INT low, high;
6582 /* Detect coprocessor load/stores. */
6583 bool coproc_p = ((TARGET_HARD_FLOAT
6584 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6585 && (mode == SFmode || mode == DFmode
6586 || (mode == DImode && TARGET_MAVERICK)))
6587 || (TARGET_REALLY_IWMMXT
6588 && VALID_IWMMXT_REG_MODE (mode))
6590 && (VALID_NEON_DREG_MODE (mode)
6591 || VALID_NEON_QREG_MODE (mode))));
6593 /* For some conditions, bail out when lower two bits are unaligned. */
6594 if ((val & 0x3) != 0
6595 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6597 /* For DI, and DF under soft-float: */
6598 || ((mode == DImode || mode == DFmode)
6599 /* Without ldrd, we use stm/ldm, which does not
6600 fair well with unaligned bits. */
6602 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6603 || TARGET_THUMB2))))
6606 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6607 of which the (reg+high) gets turned into a reload add insn,
6608 we try to decompose the index into high/low values that can often
6609 also lead to better reload CSE.
6611 ldr r0, [r2, #4100] // Offset too large
6612 ldr r1, [r2, #4104] // Offset too large
6614 is best reloaded as:
6620 which post-reload CSE can simplify in most cases to eliminate the
6621 second add instruction:
6626 The idea here is that we want to split out the bits of the constant
6627 as a mask, rather than as subtracting the maximum offset that the
6628 respective type of load/store used can handle.
6630 When encountering negative offsets, we can still utilize it even if
6631 the overall offset is positive; sometimes this may lead to an immediate
6632 that can be constructed with fewer instructions.
6634 ldr r0, [r2, #0x3FFFFC]
6636 This is best reloaded as:
6637 add t1, r2, #0x400000
6640 The trick for spotting this for a load insn with N bits of offset
6641 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6642 negative offset that is going to make bit N and all the bits below
6643 it become zero in the remainder part.
6645 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6646 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6647 used in most cases of ARM load/store instructions. */
6649 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6650 (((VAL) & ((1 << (N)) - 1)) \
6651 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6656 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6658 /* NEON quad-word load/stores are made of two double-word accesses,
6659 so the valid index range is reduced by 8. Treat as 9-bit range if
6661 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6662 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6664 else if (GET_MODE_SIZE (mode) == 8)
6667 low = (TARGET_THUMB2
6668 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6669 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6671 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6672 to access doublewords. The supported load/store offsets are
6673 -8, -4, and 4, which we try to produce here. */
6674 low = ((val & 0xf) ^ 0x8) - 0x8;
6676 else if (GET_MODE_SIZE (mode) < 8)
6678 /* NEON element load/stores do not have an offset. */
6679 if (TARGET_NEON_FP16 && mode == HFmode)
6684 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6685 Try the wider 12-bit range first, and re-try if the result
6687 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6689 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6693 if (mode == HImode || mode == HFmode)
6696 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6699 /* The storehi/movhi_bytes fallbacks can use only
6700 [-4094,+4094] of the full ldrb/strb index range. */
6701 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6702 if (low == 4095 || low == -4095)
6707 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6713 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6714 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6715 - (unsigned HOST_WIDE_INT) 0x80000000);
6716 /* Check for overflow or zero */
6717 if (low == 0 || high == 0 || (high + low != val))
6720 /* Reload the high part into a base reg; leave the low part
6722 *p = gen_rtx_PLUS (GET_MODE (*p),
6723 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6726 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6727 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6728 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6736 thumb_legitimize_reload_address (rtx *x_p,
6737 enum machine_mode mode,
6738 int opnum, int type,
6739 int ind_levels ATTRIBUTE_UNUSED)
6743 if (GET_CODE (x) == PLUS
6744 && GET_MODE_SIZE (mode) < 4
6745 && REG_P (XEXP (x, 0))
6746 && XEXP (x, 0) == stack_pointer_rtx
6747 && GET_CODE (XEXP (x, 1)) == CONST_INT
6748 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6753 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6754 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6758 /* If both registers are hi-regs, then it's better to reload the
6759 entire expression rather than each register individually. That
6760 only requires one reload register rather than two. */
6761 if (GET_CODE (x) == PLUS
6762 && REG_P (XEXP (x, 0))
6763 && REG_P (XEXP (x, 1))
6764 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6765 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6770 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6771 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6778 /* Test for various thread-local symbols. */
6780 /* Return TRUE if X is a thread-local symbol. */
6783 arm_tls_symbol_p (rtx x)
6785 if (! TARGET_HAVE_TLS)
6788 if (GET_CODE (x) != SYMBOL_REF)
6791 return SYMBOL_REF_TLS_MODEL (x) != 0;
6794 /* Helper for arm_tls_referenced_p. */
6797 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6799 if (GET_CODE (*x) == SYMBOL_REF)
6800 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6802 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6803 TLS offsets, not real symbol references. */
6804 if (GET_CODE (*x) == UNSPEC
6805 && XINT (*x, 1) == UNSPEC_TLS)
6811 /* Return TRUE if X contains any TLS symbol references. */
6814 arm_tls_referenced_p (rtx x)
6816 if (! TARGET_HAVE_TLS)
6819 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6822 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6824 On the ARM, allow any integer (invalid ones are removed later by insn
6825 patterns), nice doubles and symbol_refs which refer to the function's
6828 When generating pic allow anything. */
6831 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6833 /* At present, we have no support for Neon structure constants, so forbid
6834 them here. It might be possible to handle simple cases like 0 and -1
6836 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6839 return flag_pic || !label_mentioned_p (x);
6843 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6845 return (GET_CODE (x) == CONST_INT
6846 || GET_CODE (x) == CONST_DOUBLE
6847 || CONSTANT_ADDRESS_P (x)
6852 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6854 return (!arm_cannot_force_const_mem (mode, x)
6856 ? arm_legitimate_constant_p_1 (mode, x)
6857 : thumb_legitimate_constant_p (mode, x)));
6860 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6863 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6867 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6869 split_const (x, &base, &offset);
6870 if (GET_CODE (base) == SYMBOL_REF
6871 && !offset_within_block_p (base, INTVAL (offset)))
6874 return arm_tls_referenced_p (x);
6877 #define REG_OR_SUBREG_REG(X) \
6878 (GET_CODE (X) == REG \
6879 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6881 #define REG_OR_SUBREG_RTX(X) \
6882 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6885 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6887 enum machine_mode mode = GET_MODE (x);
6901 return COSTS_N_INSNS (1);
6904 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6907 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6914 return COSTS_N_INSNS (2) + cycles;
6916 return COSTS_N_INSNS (1) + 16;
6919 return (COSTS_N_INSNS (1)
6920 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6921 + GET_CODE (SET_DEST (x)) == MEM));
6926 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6928 if (thumb_shiftable_const (INTVAL (x)))
6929 return COSTS_N_INSNS (2);
6930 return COSTS_N_INSNS (3);
6932 else if ((outer == PLUS || outer == COMPARE)
6933 && INTVAL (x) < 256 && INTVAL (x) > -256)
6935 else if ((outer == IOR || outer == XOR || outer == AND)
6936 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6937 return COSTS_N_INSNS (1);
6938 else if (outer == AND)
6941 /* This duplicates the tests in the andsi3 expander. */
6942 for (i = 9; i <= 31; i++)
6943 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6944 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6945 return COSTS_N_INSNS (2);
6947 else if (outer == ASHIFT || outer == ASHIFTRT
6948 || outer == LSHIFTRT)
6950 return COSTS_N_INSNS (2);
6956 return COSTS_N_INSNS (3);
6974 /* XXX another guess. */
6975 /* Memory costs quite a lot for the first word, but subsequent words
6976 load at the equivalent of a single insn each. */
6977 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6978 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6983 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6989 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6990 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6996 return total + COSTS_N_INSNS (1);
6998 /* Assume a two-shift sequence. Increase the cost slightly so
6999 we prefer actual shifts over an extend operation. */
7000 return total + 1 + COSTS_N_INSNS (2);
7008 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7010 enum machine_mode mode = GET_MODE (x);
7011 enum rtx_code subcode;
7013 enum rtx_code code = GET_CODE (x);
7019 /* Memory costs quite a lot for the first word, but subsequent words
7020 load at the equivalent of a single insn each. */
7021 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7028 if (TARGET_HARD_FLOAT && mode == SFmode)
7029 *total = COSTS_N_INSNS (2);
7030 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7031 *total = COSTS_N_INSNS (4);
7033 *total = COSTS_N_INSNS (20);
7037 if (GET_CODE (XEXP (x, 1)) == REG)
7038 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7039 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7040 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7046 *total += COSTS_N_INSNS (4);
7051 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7052 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7055 *total += COSTS_N_INSNS (3);
7059 *total += COSTS_N_INSNS (1);
7060 /* Increase the cost of complex shifts because they aren't any faster,
7061 and reduce dual issue opportunities. */
7062 if (arm_tune_cortex_a9
7063 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7071 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7072 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7073 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7075 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7079 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7080 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7082 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7089 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7091 if (TARGET_HARD_FLOAT
7093 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7095 *total = COSTS_N_INSNS (1);
7096 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7097 && arm_const_double_rtx (XEXP (x, 0)))
7099 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7103 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7104 && arm_const_double_rtx (XEXP (x, 1)))
7106 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7112 *total = COSTS_N_INSNS (20);
7116 *total = COSTS_N_INSNS (1);
7117 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7118 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7120 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7124 subcode = GET_CODE (XEXP (x, 1));
7125 if (subcode == ASHIFT || subcode == ASHIFTRT
7126 || subcode == LSHIFTRT
7127 || subcode == ROTATE || subcode == ROTATERT)
7129 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7130 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7134 /* A shift as a part of RSB costs no more than RSB itself. */
7135 if (GET_CODE (XEXP (x, 0)) == MULT
7136 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7139 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7144 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7146 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7147 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7151 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7152 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7154 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7155 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7156 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7157 *total += COSTS_N_INSNS (1);
7165 if (code == PLUS && arm_arch6 && mode == SImode
7166 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7167 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7169 *total = COSTS_N_INSNS (1);
7170 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7172 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7176 /* MLA: All arguments must be registers. We filter out
7177 multiplication by a power of two, so that we fall down into
7179 if (GET_CODE (XEXP (x, 0)) == MULT
7180 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7182 /* The cost comes from the cost of the multiply. */
7186 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7188 if (TARGET_HARD_FLOAT
7190 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7192 *total = COSTS_N_INSNS (1);
7193 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7194 && arm_const_double_rtx (XEXP (x, 1)))
7196 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7203 *total = COSTS_N_INSNS (20);
7207 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7208 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7210 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7211 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7212 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7213 *total += COSTS_N_INSNS (1);
7219 case AND: case XOR: case IOR:
7221 /* Normally the frame registers will be spilt into reg+const during
7222 reload, so it is a bad idea to combine them with other instructions,
7223 since then they might not be moved outside of loops. As a compromise
7224 we allow integration with ops that have a constant as their second
7226 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7227 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7228 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7229 *total = COSTS_N_INSNS (1);
7233 *total += COSTS_N_INSNS (2);
7234 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7235 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7237 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7244 *total += COSTS_N_INSNS (1);
7245 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7246 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7248 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7251 subcode = GET_CODE (XEXP (x, 0));
7252 if (subcode == ASHIFT || subcode == ASHIFTRT
7253 || subcode == LSHIFTRT
7254 || subcode == ROTATE || subcode == ROTATERT)
7256 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7257 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7262 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7264 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7265 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7269 if (subcode == UMIN || subcode == UMAX
7270 || subcode == SMIN || subcode == SMAX)
7272 *total = COSTS_N_INSNS (3);
7279 /* This should have been handled by the CPU specific routines. */
7283 if (arm_arch3m && mode == SImode
7284 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7286 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7287 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7288 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7289 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7291 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7294 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7298 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7300 if (TARGET_HARD_FLOAT
7302 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7304 *total = COSTS_N_INSNS (1);
7307 *total = COSTS_N_INSNS (2);
7313 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7314 if (mode == SImode && code == NOT)
7316 subcode = GET_CODE (XEXP (x, 0));
7317 if (subcode == ASHIFT || subcode == ASHIFTRT
7318 || subcode == LSHIFTRT
7319 || subcode == ROTATE || subcode == ROTATERT
7321 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7323 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7324 /* Register shifts cost an extra cycle. */
7325 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7326 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7335 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7337 *total = COSTS_N_INSNS (4);
7341 operand = XEXP (x, 0);
7343 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7344 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7345 && GET_CODE (XEXP (operand, 0)) == REG
7346 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7347 *total += COSTS_N_INSNS (1);
7348 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7349 + rtx_cost (XEXP (x, 2), code, 2, speed));
7353 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7355 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7361 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7362 && mode == SImode && XEXP (x, 1) == const0_rtx)
7364 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7370 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7371 && mode == SImode && XEXP (x, 1) == const0_rtx)
7373 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7393 /* SCC insns. In the case where the comparison has already been
7394 performed, then they cost 2 instructions. Otherwise they need
7395 an additional comparison before them. */
7396 *total = COSTS_N_INSNS (2);
7397 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7404 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7410 *total += COSTS_N_INSNS (1);
7411 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7412 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7414 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7418 subcode = GET_CODE (XEXP (x, 0));
7419 if (subcode == ASHIFT || subcode == ASHIFTRT
7420 || subcode == LSHIFTRT
7421 || subcode == ROTATE || subcode == ROTATERT)
7423 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7424 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7429 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7431 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7442 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7443 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7444 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7445 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7449 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7451 if (TARGET_HARD_FLOAT
7453 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7455 *total = COSTS_N_INSNS (1);
7458 *total = COSTS_N_INSNS (20);
7461 *total = COSTS_N_INSNS (1);
7463 *total += COSTS_N_INSNS (3);
7469 if (GET_MODE_CLASS (mode) == MODE_INT)
7471 rtx op = XEXP (x, 0);
7472 enum machine_mode opmode = GET_MODE (op);
7475 *total += COSTS_N_INSNS (1);
7477 if (opmode != SImode)
7481 /* If !arm_arch4, we use one of the extendhisi2_mem
7482 or movhi_bytes patterns for HImode. For a QImode
7483 sign extension, we first zero-extend from memory
7484 and then perform a shift sequence. */
7485 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7486 *total += COSTS_N_INSNS (2);
7489 *total += COSTS_N_INSNS (1);
7491 /* We don't have the necessary insn, so we need to perform some
7493 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7494 /* An and with constant 255. */
7495 *total += COSTS_N_INSNS (1);
7497 /* A shift sequence. Increase costs slightly to avoid
7498 combining two shifts into an extend operation. */
7499 *total += COSTS_N_INSNS (2) + 1;
7505 switch (GET_MODE (XEXP (x, 0)))
7512 *total = COSTS_N_INSNS (1);
7522 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7526 if (const_ok_for_arm (INTVAL (x))
7527 || const_ok_for_arm (~INTVAL (x)))
7528 *total = COSTS_N_INSNS (1);
7530 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7531 INTVAL (x), NULL_RTX,
7538 *total = COSTS_N_INSNS (3);
7542 *total = COSTS_N_INSNS (1);
7546 *total = COSTS_N_INSNS (1);
7547 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7551 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7552 && (mode == SFmode || !TARGET_VFP_SINGLE))
7553 *total = COSTS_N_INSNS (1);
7555 *total = COSTS_N_INSNS (4);
7562 *total = COSTS_N_INSNS (4);
7567 /* Estimates the size cost of thumb1 instructions.
7568 For now most of the code is copied from thumb1_rtx_costs. We need more
7569 fine grain tuning when we have more related test cases. */
7571 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7573 enum machine_mode mode = GET_MODE (x);
7586 return COSTS_N_INSNS (1);
7589 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7591 /* Thumb1 mul instruction can't operate on const. We must Load it
7592 into a register first. */
7593 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7594 return COSTS_N_INSNS (1) + const_size;
7596 return COSTS_N_INSNS (1);
7599 return (COSTS_N_INSNS (1)
7600 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7601 + GET_CODE (SET_DEST (x)) == MEM));
7606 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7607 return COSTS_N_INSNS (1);
7608 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7609 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7610 return COSTS_N_INSNS (2);
7611 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7612 if (thumb_shiftable_const (INTVAL (x)))
7613 return COSTS_N_INSNS (2);
7614 return COSTS_N_INSNS (3);
7616 else if ((outer == PLUS || outer == COMPARE)
7617 && INTVAL (x) < 256 && INTVAL (x) > -256)
7619 else if ((outer == IOR || outer == XOR || outer == AND)
7620 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7621 return COSTS_N_INSNS (1);
7622 else if (outer == AND)
7625 /* This duplicates the tests in the andsi3 expander. */
7626 for (i = 9; i <= 31; i++)
7627 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7628 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7629 return COSTS_N_INSNS (2);
7631 else if (outer == ASHIFT || outer == ASHIFTRT
7632 || outer == LSHIFTRT)
7634 return COSTS_N_INSNS (2);
7640 return COSTS_N_INSNS (3);
7658 /* XXX another guess. */
7659 /* Memory costs quite a lot for the first word, but subsequent words
7660 load at the equivalent of a single insn each. */
7661 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7662 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7667 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7672 /* XXX still guessing. */
7673 switch (GET_MODE (XEXP (x, 0)))
7676 return (1 + (mode == DImode ? 4 : 0)
7677 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7680 return (4 + (mode == DImode ? 4 : 0)
7681 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7684 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7695 /* RTX costs when optimizing for size. */
7697 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7700 enum machine_mode mode = GET_MODE (x);
7703 *total = thumb1_size_rtx_costs (x, code, outer_code);
7707 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7711 /* A memory access costs 1 insn if the mode is small, or the address is
7712 a single register, otherwise it costs one insn per word. */
7713 if (REG_P (XEXP (x, 0)))
7714 *total = COSTS_N_INSNS (1);
7716 && GET_CODE (XEXP (x, 0)) == PLUS
7717 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7718 /* This will be split into two instructions.
7719 See arm.md:calculate_pic_address. */
7720 *total = COSTS_N_INSNS (2);
7722 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7729 /* Needs a libcall, so it costs about this. */
7730 *total = COSTS_N_INSNS (2);
7734 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7736 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7744 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7746 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7749 else if (mode == SImode)
7751 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7752 /* Slightly disparage register shifts, but not by much. */
7753 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7754 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7758 /* Needs a libcall. */
7759 *total = COSTS_N_INSNS (2);
7763 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7764 && (mode == SFmode || !TARGET_VFP_SINGLE))
7766 *total = COSTS_N_INSNS (1);
7772 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7773 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7775 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7776 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7777 || subcode1 == ROTATE || subcode1 == ROTATERT
7778 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7779 || subcode1 == ASHIFTRT)
7781 /* It's just the cost of the two operands. */
7786 *total = COSTS_N_INSNS (1);
7790 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7794 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7795 && (mode == SFmode || !TARGET_VFP_SINGLE))
7797 *total = COSTS_N_INSNS (1);
7801 /* A shift as a part of ADD costs nothing. */
7802 if (GET_CODE (XEXP (x, 0)) == MULT
7803 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7805 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7806 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7807 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7812 case AND: case XOR: case IOR:
7815 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7817 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7818 || subcode == LSHIFTRT || subcode == ASHIFTRT
7819 || (code == AND && subcode == NOT))
7821 /* It's just the cost of the two operands. */
7827 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7831 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7835 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7836 && (mode == SFmode || !TARGET_VFP_SINGLE))
7838 *total = COSTS_N_INSNS (1);
7844 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7853 if (cc_register (XEXP (x, 0), VOIDmode))
7856 *total = COSTS_N_INSNS (1);
7860 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7861 && (mode == SFmode || !TARGET_VFP_SINGLE))
7862 *total = COSTS_N_INSNS (1);
7864 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7869 return arm_rtx_costs_1 (x, outer_code, total, 0);
7872 if (const_ok_for_arm (INTVAL (x)))
7873 /* A multiplication by a constant requires another instruction
7874 to load the constant to a register. */
7875 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7877 else if (const_ok_for_arm (~INTVAL (x)))
7878 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7879 else if (const_ok_for_arm (-INTVAL (x)))
7881 if (outer_code == COMPARE || outer_code == PLUS
7882 || outer_code == MINUS)
7885 *total = COSTS_N_INSNS (1);
7888 *total = COSTS_N_INSNS (2);
7894 *total = COSTS_N_INSNS (2);
7898 *total = COSTS_N_INSNS (4);
7903 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7904 cost of these slightly. */
7905 *total = COSTS_N_INSNS (1) + 1;
7912 if (mode != VOIDmode)
7913 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7915 *total = COSTS_N_INSNS (4); /* How knows? */
7920 /* RTX costs when optimizing for size. */
7922 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7923 int *total, bool speed)
7926 return arm_size_rtx_costs (x, (enum rtx_code) code,
7927 (enum rtx_code) outer_code, total);
7929 return current_tune->rtx_costs (x, (enum rtx_code) code,
7930 (enum rtx_code) outer_code,
7934 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7935 supported on any "slowmul" cores, so it can be ignored. */
7938 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7939 int *total, bool speed)
7941 enum machine_mode mode = GET_MODE (x);
7945 *total = thumb1_rtx_costs (x, code, outer_code);
7952 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7955 *total = COSTS_N_INSNS (20);
7959 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7961 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7962 & (unsigned HOST_WIDE_INT) 0xffffffff);
7963 int cost, const_ok = const_ok_for_arm (i);
7964 int j, booth_unit_size;
7966 /* Tune as appropriate. */
7967 cost = const_ok ? 4 : 8;
7968 booth_unit_size = 2;
7969 for (j = 0; i && j < 32; j += booth_unit_size)
7971 i >>= booth_unit_size;
7975 *total = COSTS_N_INSNS (cost);
7976 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7980 *total = COSTS_N_INSNS (20);
7984 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7989 /* RTX cost for cores with a fast multiply unit (M variants). */
7992 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7993 int *total, bool speed)
7995 enum machine_mode mode = GET_MODE (x);
7999 *total = thumb1_rtx_costs (x, code, outer_code);
8003 /* ??? should thumb2 use different costs? */
8007 /* There is no point basing this on the tuning, since it is always the
8008 fast variant if it exists at all. */
8010 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8011 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8012 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8014 *total = COSTS_N_INSNS(2);
8021 *total = COSTS_N_INSNS (5);
8025 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8027 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8028 & (unsigned HOST_WIDE_INT) 0xffffffff);
8029 int cost, const_ok = const_ok_for_arm (i);
8030 int j, booth_unit_size;
8032 /* Tune as appropriate. */
8033 cost = const_ok ? 4 : 8;
8034 booth_unit_size = 8;
8035 for (j = 0; i && j < 32; j += booth_unit_size)
8037 i >>= booth_unit_size;
8041 *total = COSTS_N_INSNS(cost);
8047 *total = COSTS_N_INSNS (4);
8051 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8053 if (TARGET_HARD_FLOAT
8055 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8057 *total = COSTS_N_INSNS (1);
8062 /* Requires a lib call */
8063 *total = COSTS_N_INSNS (20);
8067 return arm_rtx_costs_1 (x, outer_code, total, speed);
8072 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8073 so it can be ignored. */
8076 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8077 int *total, bool speed)
8079 enum machine_mode mode = GET_MODE (x);
8083 *total = thumb1_rtx_costs (x, code, outer_code);
8090 if (GET_CODE (XEXP (x, 0)) != MULT)
8091 return arm_rtx_costs_1 (x, outer_code, total, speed);
8093 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8094 will stall until the multiplication is complete. */
8095 *total = COSTS_N_INSNS (3);
8099 /* There is no point basing this on the tuning, since it is always the
8100 fast variant if it exists at all. */
8102 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8103 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8104 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8106 *total = COSTS_N_INSNS (2);
8113 *total = COSTS_N_INSNS (5);
8117 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8119 /* If operand 1 is a constant we can more accurately
8120 calculate the cost of the multiply. The multiplier can
8121 retire 15 bits on the first cycle and a further 12 on the
8122 second. We do, of course, have to load the constant into
8123 a register first. */
8124 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8125 /* There's a general overhead of one cycle. */
8127 unsigned HOST_WIDE_INT masked_const;
8132 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8134 masked_const = i & 0xffff8000;
8135 if (masked_const != 0)
8138 masked_const = i & 0xf8000000;
8139 if (masked_const != 0)
8142 *total = COSTS_N_INSNS (cost);
8148 *total = COSTS_N_INSNS (3);
8152 /* Requires a lib call */
8153 *total = COSTS_N_INSNS (20);
8157 return arm_rtx_costs_1 (x, outer_code, total, speed);
8162 /* RTX costs for 9e (and later) cores. */
8165 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8166 int *total, bool speed)
8168 enum machine_mode mode = GET_MODE (x);
8175 *total = COSTS_N_INSNS (3);
8179 *total = thumb1_rtx_costs (x, code, outer_code);
8187 /* There is no point basing this on the tuning, since it is always the
8188 fast variant if it exists at all. */
8190 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8191 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8192 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8194 *total = COSTS_N_INSNS (2);
8201 *total = COSTS_N_INSNS (5);
8207 *total = COSTS_N_INSNS (2);
8211 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8213 if (TARGET_HARD_FLOAT
8215 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8217 *total = COSTS_N_INSNS (1);
8222 *total = COSTS_N_INSNS (20);
8226 return arm_rtx_costs_1 (x, outer_code, total, speed);
8229 /* All address computations that can be done are free, but rtx cost returns
8230 the same for practically all of them. So we weight the different types
8231 of address here in the order (most pref first):
8232 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8234 arm_arm_address_cost (rtx x)
8236 enum rtx_code c = GET_CODE (x);
8238 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8240 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8245 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8248 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8258 arm_thumb_address_cost (rtx x)
8260 enum rtx_code c = GET_CODE (x);
8265 && GET_CODE (XEXP (x, 0)) == REG
8266 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8273 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8275 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8278 /* Adjust cost hook for XScale. */
8280 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8282 /* Some true dependencies can have a higher cost depending
8283 on precisely how certain input operands are used. */
8284 if (REG_NOTE_KIND(link) == 0
8285 && recog_memoized (insn) >= 0
8286 && recog_memoized (dep) >= 0)
8288 int shift_opnum = get_attr_shift (insn);
8289 enum attr_type attr_type = get_attr_type (dep);
8291 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8292 operand for INSN. If we have a shifted input operand and the
8293 instruction we depend on is another ALU instruction, then we may
8294 have to account for an additional stall. */
8295 if (shift_opnum != 0
8296 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8298 rtx shifted_operand;
8301 /* Get the shifted operand. */
8302 extract_insn (insn);
8303 shifted_operand = recog_data.operand[shift_opnum];
8305 /* Iterate over all the operands in DEP. If we write an operand
8306 that overlaps with SHIFTED_OPERAND, then we have increase the
8307 cost of this dependency. */
8309 preprocess_constraints ();
8310 for (opno = 0; opno < recog_data.n_operands; opno++)
8312 /* We can ignore strict inputs. */
8313 if (recog_data.operand_type[opno] == OP_IN)
8316 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8328 /* Adjust cost hook for Cortex A9. */
8330 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8332 switch (REG_NOTE_KIND (link))
8339 case REG_DEP_OUTPUT:
8340 if (recog_memoized (insn) >= 0
8341 && recog_memoized (dep) >= 0)
8343 if (GET_CODE (PATTERN (insn)) == SET)
8346 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8348 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8350 enum attr_type attr_type_insn = get_attr_type (insn);
8351 enum attr_type attr_type_dep = get_attr_type (dep);
8353 /* By default all dependencies of the form
8356 have an extra latency of 1 cycle because
8357 of the input and output dependency in this
8358 case. However this gets modeled as an true
8359 dependency and hence all these checks. */
8360 if (REG_P (SET_DEST (PATTERN (insn)))
8361 && REG_P (SET_DEST (PATTERN (dep)))
8362 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8363 SET_DEST (PATTERN (dep))))
8365 /* FMACS is a special case where the dependant
8366 instruction can be issued 3 cycles before
8367 the normal latency in case of an output
8369 if ((attr_type_insn == TYPE_FMACS
8370 || attr_type_insn == TYPE_FMACD)
8371 && (attr_type_dep == TYPE_FMACS
8372 || attr_type_dep == TYPE_FMACD))
8374 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8375 *cost = insn_default_latency (dep) - 3;
8377 *cost = insn_default_latency (dep);
8382 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8383 *cost = insn_default_latency (dep) + 1;
8385 *cost = insn_default_latency (dep);
8401 /* Adjust cost hook for FA726TE. */
8403 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8405 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8406 have penalty of 3. */
8407 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8408 && recog_memoized (insn) >= 0
8409 && recog_memoized (dep) >= 0
8410 && get_attr_conds (dep) == CONDS_SET)
8412 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8413 if (get_attr_conds (insn) == CONDS_USE
8414 && get_attr_type (insn) != TYPE_BRANCH)
8420 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8421 || get_attr_conds (insn) == CONDS_USE)
8431 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8432 It corrects the value of COST based on the relationship between
8433 INSN and DEP through the dependence LINK. It returns the new
8434 value. There is a per-core adjust_cost hook to adjust scheduler costs
8435 and the per-core hook can choose to completely override the generic
8436 adjust_cost function. Only put bits of code into arm_adjust_cost that
8437 are common across all cores. */
8439 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8443 /* When generating Thumb-1 code, we want to place flag-setting operations
8444 close to a conditional branch which depends on them, so that we can
8445 omit the comparison. */
8447 && REG_NOTE_KIND (link) == 0
8448 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8449 && recog_memoized (dep) >= 0
8450 && get_attr_conds (dep) == CONDS_SET)
8453 if (current_tune->sched_adjust_cost != NULL)
8455 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8459 /* XXX This is not strictly true for the FPA. */
8460 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8461 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8464 /* Call insns don't incur a stall, even if they follow a load. */
8465 if (REG_NOTE_KIND (link) == 0
8466 && GET_CODE (insn) == CALL_INSN)
8469 if ((i_pat = single_set (insn)) != NULL
8470 && GET_CODE (SET_SRC (i_pat)) == MEM
8471 && (d_pat = single_set (dep)) != NULL
8472 && GET_CODE (SET_DEST (d_pat)) == MEM)
8474 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8475 /* This is a load after a store, there is no conflict if the load reads
8476 from a cached area. Assume that loads from the stack, and from the
8477 constant pool are cached, and that others will miss. This is a
8480 if ((GET_CODE (src_mem) == SYMBOL_REF
8481 && CONSTANT_POOL_ADDRESS_P (src_mem))
8482 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8483 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8484 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8492 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8495 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8497 return (optimize > 0) ? 2 : 0;
8501 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8503 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8506 static int fp_consts_inited = 0;
8508 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8509 static const char * const strings_fp[8] =
8512 "4", "5", "0.5", "10"
8515 static REAL_VALUE_TYPE values_fp[8];
8518 init_fp_table (void)
8524 fp_consts_inited = 1;
8526 fp_consts_inited = 8;
8528 for (i = 0; i < fp_consts_inited; i++)
8530 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8535 /* Return TRUE if rtx X is a valid immediate FP constant. */
8537 arm_const_double_rtx (rtx x)
8542 if (!fp_consts_inited)
8545 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8546 if (REAL_VALUE_MINUS_ZERO (r))
8549 for (i = 0; i < fp_consts_inited; i++)
8550 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8556 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8558 neg_const_double_rtx_ok_for_fpa (rtx x)
8563 if (!fp_consts_inited)
8566 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8567 r = real_value_negate (&r);
8568 if (REAL_VALUE_MINUS_ZERO (r))
8571 for (i = 0; i < 8; i++)
8572 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8579 /* VFPv3 has a fairly wide range of representable immediates, formed from
8580 "quarter-precision" floating-point values. These can be evaluated using this
8581 formula (with ^ for exponentiation):
8585 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8586 16 <= n <= 31 and 0 <= r <= 7.
8588 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8590 - A (most-significant) is the sign bit.
8591 - BCD are the exponent (encoded as r XOR 3).
8592 - EFGH are the mantissa (encoded as n - 16).
8595 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8596 fconst[sd] instruction, or -1 if X isn't suitable. */
8598 vfp3_const_double_index (rtx x)
8600 REAL_VALUE_TYPE r, m;
8602 unsigned HOST_WIDE_INT mantissa, mant_hi;
8603 unsigned HOST_WIDE_INT mask;
8604 HOST_WIDE_INT m1, m2;
8605 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8607 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8610 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8612 /* We can't represent these things, so detect them first. */
8613 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8616 /* Extract sign, exponent and mantissa. */
8617 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8618 r = real_value_abs (&r);
8619 exponent = REAL_EXP (&r);
8620 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8621 highest (sign) bit, with a fixed binary point at bit point_pos.
8622 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8623 bits for the mantissa, this may fail (low bits would be lost). */
8624 real_ldexp (&m, &r, point_pos - exponent);
8625 REAL_VALUE_TO_INT (&m1, &m2, m);
8629 /* If there are bits set in the low part of the mantissa, we can't
8630 represent this value. */
8634 /* Now make it so that mantissa contains the most-significant bits, and move
8635 the point_pos to indicate that the least-significant bits have been
8637 point_pos -= HOST_BITS_PER_WIDE_INT;
8640 /* We can permit four significant bits of mantissa only, plus a high bit
8641 which is always 1. */
8642 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8643 if ((mantissa & mask) != 0)
8646 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8647 mantissa >>= point_pos - 5;
8649 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8650 floating-point immediate zero with Neon using an integer-zero load, but
8651 that case is handled elsewhere.) */
8655 gcc_assert (mantissa >= 16 && mantissa <= 31);
8657 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8658 normalized significands are in the range [1, 2). (Our mantissa is shifted
8659 left 4 places at this point relative to normalized IEEE754 values). GCC
8660 internally uses [0.5, 1) (see real.c), so the exponent returned from
8661 REAL_EXP must be altered. */
8662 exponent = 5 - exponent;
8664 if (exponent < 0 || exponent > 7)
8667 /* Sign, mantissa and exponent are now in the correct form to plug into the
8668 formula described in the comment above. */
8669 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8672 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8674 vfp3_const_double_rtx (rtx x)
8679 return vfp3_const_double_index (x) != -1;
8682 /* Recognize immediates which can be used in various Neon instructions. Legal
8683 immediates are described by the following table (for VMVN variants, the
8684 bitwise inverse of the constant shown is recognized. In either case, VMOV
8685 is output and the correct instruction to use for a given constant is chosen
8686 by the assembler). The constant shown is replicated across all elements of
8687 the destination vector.
8689 insn elems variant constant (binary)
8690 ---- ----- ------- -----------------
8691 vmov i32 0 00000000 00000000 00000000 abcdefgh
8692 vmov i32 1 00000000 00000000 abcdefgh 00000000
8693 vmov i32 2 00000000 abcdefgh 00000000 00000000
8694 vmov i32 3 abcdefgh 00000000 00000000 00000000
8695 vmov i16 4 00000000 abcdefgh
8696 vmov i16 5 abcdefgh 00000000
8697 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8698 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8699 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8700 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8701 vmvn i16 10 00000000 abcdefgh
8702 vmvn i16 11 abcdefgh 00000000
8703 vmov i32 12 00000000 00000000 abcdefgh 11111111
8704 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8705 vmov i32 14 00000000 abcdefgh 11111111 11111111
8706 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8708 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8709 eeeeeeee ffffffff gggggggg hhhhhhhh
8710 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8712 For case 18, B = !b. Representable values are exactly those accepted by
8713 vfp3_const_double_index, but are output as floating-point numbers rather
8716 Variants 0-5 (inclusive) may also be used as immediates for the second
8717 operand of VORR/VBIC instructions.
8719 The INVERSE argument causes the bitwise inverse of the given operand to be
8720 recognized instead (used for recognizing legal immediates for the VAND/VORN
8721 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8722 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8723 output, rather than the real insns vbic/vorr).
8725 INVERSE makes no difference to the recognition of float vectors.
8727 The return value is the variant of immediate as shown in the above table, or
8728 -1 if the given value doesn't match any of the listed patterns.
8731 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8732 rtx *modconst, int *elementwidth)
8734 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8736 for (i = 0; i < idx; i += (STRIDE)) \
8741 immtype = (CLASS); \
8742 elsize = (ELSIZE); \
8746 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8747 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8748 unsigned char bytes[16];
8749 int immtype = -1, matches;
8750 unsigned int invmask = inverse ? 0xff : 0;
8752 /* Vectors of float constants. */
8753 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8755 rtx el0 = CONST_VECTOR_ELT (op, 0);
8758 if (!vfp3_const_double_rtx (el0))
8761 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8763 for (i = 1; i < n_elts; i++)
8765 rtx elt = CONST_VECTOR_ELT (op, i);
8768 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8770 if (!REAL_VALUES_EQUAL (r0, re))
8775 *modconst = CONST_VECTOR_ELT (op, 0);
8783 /* Splat vector constant out into a byte vector. */
8784 for (i = 0; i < n_elts; i++)
8786 rtx el = CONST_VECTOR_ELT (op, i);
8787 unsigned HOST_WIDE_INT elpart;
8788 unsigned int part, parts;
8790 if (GET_CODE (el) == CONST_INT)
8792 elpart = INTVAL (el);
8795 else if (GET_CODE (el) == CONST_DOUBLE)
8797 elpart = CONST_DOUBLE_LOW (el);
8803 for (part = 0; part < parts; part++)
8806 for (byte = 0; byte < innersize; byte++)
8808 bytes[idx++] = (elpart & 0xff) ^ invmask;
8809 elpart >>= BITS_PER_UNIT;
8811 if (GET_CODE (el) == CONST_DOUBLE)
8812 elpart = CONST_DOUBLE_HIGH (el);
8817 gcc_assert (idx == GET_MODE_SIZE (mode));
8821 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8822 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8824 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8825 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8827 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8828 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8830 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8831 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8833 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8835 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8837 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8838 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8840 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8841 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8843 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8844 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8846 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8847 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8849 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8851 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8853 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8854 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8856 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8857 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8859 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8860 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8862 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8863 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8865 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8867 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8868 && bytes[i] == bytes[(i + 8) % idx]);
8876 *elementwidth = elsize;
8880 unsigned HOST_WIDE_INT imm = 0;
8882 /* Un-invert bytes of recognized vector, if necessary. */
8884 for (i = 0; i < idx; i++)
8885 bytes[i] ^= invmask;
8889 /* FIXME: Broken on 32-bit H_W_I hosts. */
8890 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8892 for (i = 0; i < 8; i++)
8893 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8894 << (i * BITS_PER_UNIT);
8896 *modconst = GEN_INT (imm);
8900 unsigned HOST_WIDE_INT imm = 0;
8902 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8903 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8905 *modconst = GEN_INT (imm);
8913 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8914 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8915 float elements), and a modified constant (whatever should be output for a
8916 VMOV) in *MODCONST. */
8919 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8920 rtx *modconst, int *elementwidth)
8924 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8930 *modconst = tmpconst;
8933 *elementwidth = tmpwidth;
8938 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8939 the immediate is valid, write a constant suitable for using as an operand
8940 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8941 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8944 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8945 rtx *modconst, int *elementwidth)
8949 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8951 if (retval < 0 || retval > 5)
8955 *modconst = tmpconst;
8958 *elementwidth = tmpwidth;
8963 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
8964 the immediate is valid, write a constant suitable for using as an operand
8965 to VSHR/VSHL to *MODCONST and the corresponding element width to
8966 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
8967 because they have different limitations. */
8970 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
8971 rtx *modconst, int *elementwidth,
8974 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8975 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
8976 unsigned HOST_WIDE_INT last_elt = 0;
8977 unsigned HOST_WIDE_INT maxshift;
8979 /* Split vector constant out into a byte vector. */
8980 for (i = 0; i < n_elts; i++)
8982 rtx el = CONST_VECTOR_ELT (op, i);
8983 unsigned HOST_WIDE_INT elpart;
8985 if (GET_CODE (el) == CONST_INT)
8986 elpart = INTVAL (el);
8987 else if (GET_CODE (el) == CONST_DOUBLE)
8992 if (i != 0 && elpart != last_elt)
8998 /* Shift less than element size. */
8999 maxshift = innersize * 8;
9003 /* Left shift immediate value can be from 0 to <size>-1. */
9004 if (last_elt >= maxshift)
9009 /* Right shift immediate value can be from 1 to <size>. */
9010 if (last_elt == 0 || last_elt > maxshift)
9015 *elementwidth = innersize * 8;
9018 *modconst = CONST_VECTOR_ELT (op, 0);
9023 /* Return a string suitable for output of Neon immediate logic operation
9027 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9028 int inverse, int quad)
9030 int width, is_valid;
9031 static char templ[40];
9033 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9035 gcc_assert (is_valid != 0);
9038 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9040 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9045 /* Return a string suitable for output of Neon immediate shift operation
9046 (VSHR or VSHL) MNEM. */
9049 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9050 enum machine_mode mode, int quad,
9053 int width, is_valid;
9054 static char templ[40];
9056 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9057 gcc_assert (is_valid != 0);
9060 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9062 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9067 /* Output a sequence of pairwise operations to implement a reduction.
9068 NOTE: We do "too much work" here, because pairwise operations work on two
9069 registers-worth of operands in one go. Unfortunately we can't exploit those
9070 extra calculations to do the full operation in fewer steps, I don't think.
9071 Although all vector elements of the result but the first are ignored, we
9072 actually calculate the same result in each of the elements. An alternative
9073 such as initially loading a vector with zero to use as each of the second
9074 operands would use up an additional register and take an extra instruction,
9075 for no particular gain. */
9078 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9079 rtx (*reduc) (rtx, rtx, rtx))
9081 enum machine_mode inner = GET_MODE_INNER (mode);
9082 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9085 for (i = parts / 2; i >= 1; i /= 2)
9087 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9088 emit_insn (reduc (dest, tmpsum, tmpsum));
9093 /* If VALS is a vector constant that can be loaded into a register
9094 using VDUP, generate instructions to do so and return an RTX to
9095 assign to the register. Otherwise return NULL_RTX. */
9098 neon_vdup_constant (rtx vals)
9100 enum machine_mode mode = GET_MODE (vals);
9101 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9102 int n_elts = GET_MODE_NUNITS (mode);
9103 bool all_same = true;
9107 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9110 for (i = 0; i < n_elts; ++i)
9112 x = XVECEXP (vals, 0, i);
9113 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9118 /* The elements are not all the same. We could handle repeating
9119 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9120 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9124 /* We can load this constant by using VDUP and a constant in a
9125 single ARM register. This will be cheaper than a vector
9128 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9129 return gen_rtx_VEC_DUPLICATE (mode, x);
9132 /* Generate code to load VALS, which is a PARALLEL containing only
9133 constants (for vec_init) or CONST_VECTOR, efficiently into a
9134 register. Returns an RTX to copy into the register, or NULL_RTX
9135 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9138 neon_make_constant (rtx vals)
9140 enum machine_mode mode = GET_MODE (vals);
9142 rtx const_vec = NULL_RTX;
9143 int n_elts = GET_MODE_NUNITS (mode);
9147 if (GET_CODE (vals) == CONST_VECTOR)
9149 else if (GET_CODE (vals) == PARALLEL)
9151 /* A CONST_VECTOR must contain only CONST_INTs and
9152 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9153 Only store valid constants in a CONST_VECTOR. */
9154 for (i = 0; i < n_elts; ++i)
9156 rtx x = XVECEXP (vals, 0, i);
9157 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9160 if (n_const == n_elts)
9161 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9166 if (const_vec != NULL
9167 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9168 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9170 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9171 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9172 pipeline cycle; creating the constant takes one or two ARM
9175 else if (const_vec != NULL_RTX)
9176 /* Load from constant pool. On Cortex-A8 this takes two cycles
9177 (for either double or quad vectors). We can not take advantage
9178 of single-cycle VLD1 because we need a PC-relative addressing
9182 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9183 We can not construct an initializer. */
9187 /* Initialize vector TARGET to VALS. */
9190 neon_expand_vector_init (rtx target, rtx vals)
9192 enum machine_mode mode = GET_MODE (target);
9193 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9194 int n_elts = GET_MODE_NUNITS (mode);
9195 int n_var = 0, one_var = -1;
9196 bool all_same = true;
9200 for (i = 0; i < n_elts; ++i)
9202 x = XVECEXP (vals, 0, i);
9203 if (!CONSTANT_P (x))
9204 ++n_var, one_var = i;
9206 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9212 rtx constant = neon_make_constant (vals);
9213 if (constant != NULL_RTX)
9215 emit_move_insn (target, constant);
9220 /* Splat a single non-constant element if we can. */
9221 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9223 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9224 emit_insn (gen_rtx_SET (VOIDmode, target,
9225 gen_rtx_VEC_DUPLICATE (mode, x)));
9229 /* One field is non-constant. Load constant then overwrite varying
9230 field. This is more efficient than using the stack. */
9233 rtx copy = copy_rtx (vals);
9234 rtx index = GEN_INT (one_var);
9236 /* Load constant part of vector, substitute neighboring value for
9238 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9239 neon_expand_vector_init (target, copy);
9241 /* Insert variable. */
9242 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9246 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9249 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9252 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9255 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9258 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9261 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9264 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9267 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9270 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9278 /* Construct the vector in memory one field at a time
9279 and load the whole vector. */
9280 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9281 for (i = 0; i < n_elts; i++)
9282 emit_move_insn (adjust_address_nv (mem, inner_mode,
9283 i * GET_MODE_SIZE (inner_mode)),
9284 XVECEXP (vals, 0, i));
9285 emit_move_insn (target, mem);
9288 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9289 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9290 reported source locations are bogus. */
9293 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9298 gcc_assert (GET_CODE (operand) == CONST_INT);
9300 lane = INTVAL (operand);
9302 if (lane < low || lane >= high)
9306 /* Bounds-check lanes. */
9309 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9311 bounds_check (operand, low, high, "lane out of range");
9314 /* Bounds-check constants. */
9317 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9319 bounds_check (operand, low, high, "constant out of range");
9323 neon_element_bits (enum machine_mode mode)
9326 return GET_MODE_BITSIZE (mode);
9328 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9332 /* Predicates for `match_operand' and `match_operator'. */
9334 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9336 cirrus_memory_offset (rtx op)
9338 /* Reject eliminable registers. */
9339 if (! (reload_in_progress || reload_completed)
9340 && ( reg_mentioned_p (frame_pointer_rtx, op)
9341 || reg_mentioned_p (arg_pointer_rtx, op)
9342 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9343 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9344 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9345 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9348 if (GET_CODE (op) == MEM)
9354 /* Match: (mem (reg)). */
9355 if (GET_CODE (ind) == REG)
9361 if (GET_CODE (ind) == PLUS
9362 && GET_CODE (XEXP (ind, 0)) == REG
9363 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9364 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9371 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9372 WB is true if full writeback address modes are allowed and is false
9373 if limited writeback address modes (POST_INC and PRE_DEC) are
9377 arm_coproc_mem_operand (rtx op, bool wb)
9381 /* Reject eliminable registers. */
9382 if (! (reload_in_progress || reload_completed)
9383 && ( reg_mentioned_p (frame_pointer_rtx, op)
9384 || reg_mentioned_p (arg_pointer_rtx, op)
9385 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9386 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9387 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9388 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9391 /* Constants are converted into offsets from labels. */
9392 if (GET_CODE (op) != MEM)
9397 if (reload_completed
9398 && (GET_CODE (ind) == LABEL_REF
9399 || (GET_CODE (ind) == CONST
9400 && GET_CODE (XEXP (ind, 0)) == PLUS
9401 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9402 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9405 /* Match: (mem (reg)). */
9406 if (GET_CODE (ind) == REG)
9407 return arm_address_register_rtx_p (ind, 0);
9409 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9410 acceptable in any case (subject to verification by
9411 arm_address_register_rtx_p). We need WB to be true to accept
9412 PRE_INC and POST_DEC. */
9413 if (GET_CODE (ind) == POST_INC
9414 || GET_CODE (ind) == PRE_DEC
9416 && (GET_CODE (ind) == PRE_INC
9417 || GET_CODE (ind) == POST_DEC)))
9418 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9421 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9422 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9423 && GET_CODE (XEXP (ind, 1)) == PLUS
9424 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9425 ind = XEXP (ind, 1);
9430 if (GET_CODE (ind) == PLUS
9431 && GET_CODE (XEXP (ind, 0)) == REG
9432 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9433 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9434 && INTVAL (XEXP (ind, 1)) > -1024
9435 && INTVAL (XEXP (ind, 1)) < 1024
9436 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9442 /* Return TRUE if OP is a memory operand which we can load or store a vector
9443 to/from. TYPE is one of the following values:
9444 0 - Vector load/stor (vldr)
9445 1 - Core registers (ldm)
9446 2 - Element/structure loads (vld1)
9449 neon_vector_mem_operand (rtx op, int type)
9453 /* Reject eliminable registers. */
9454 if (! (reload_in_progress || reload_completed)
9455 && ( reg_mentioned_p (frame_pointer_rtx, op)
9456 || reg_mentioned_p (arg_pointer_rtx, op)
9457 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9458 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9459 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9460 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9463 /* Constants are converted into offsets from labels. */
9464 if (GET_CODE (op) != MEM)
9469 if (reload_completed
9470 && (GET_CODE (ind) == LABEL_REF
9471 || (GET_CODE (ind) == CONST
9472 && GET_CODE (XEXP (ind, 0)) == PLUS
9473 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9474 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9477 /* Match: (mem (reg)). */
9478 if (GET_CODE (ind) == REG)
9479 return arm_address_register_rtx_p (ind, 0);
9481 /* Allow post-increment with Neon registers. */
9482 if ((type != 1 && GET_CODE (ind) == POST_INC)
9483 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9484 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9486 /* FIXME: vld1 allows register post-modify. */
9492 && GET_CODE (ind) == PLUS
9493 && GET_CODE (XEXP (ind, 0)) == REG
9494 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9495 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9496 && INTVAL (XEXP (ind, 1)) > -1024
9497 && INTVAL (XEXP (ind, 1)) < 1016
9498 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9504 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9507 neon_struct_mem_operand (rtx op)
9511 /* Reject eliminable registers. */
9512 if (! (reload_in_progress || reload_completed)
9513 && ( reg_mentioned_p (frame_pointer_rtx, op)
9514 || reg_mentioned_p (arg_pointer_rtx, op)
9515 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9516 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9517 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9518 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9521 /* Constants are converted into offsets from labels. */
9522 if (GET_CODE (op) != MEM)
9527 if (reload_completed
9528 && (GET_CODE (ind) == LABEL_REF
9529 || (GET_CODE (ind) == CONST
9530 && GET_CODE (XEXP (ind, 0)) == PLUS
9531 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9532 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9535 /* Match: (mem (reg)). */
9536 if (GET_CODE (ind) == REG)
9537 return arm_address_register_rtx_p (ind, 0);
9539 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9540 if (GET_CODE (ind) == POST_INC
9541 || GET_CODE (ind) == PRE_DEC)
9542 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9547 /* Return true if X is a register that will be eliminated later on. */
9549 arm_eliminable_register (rtx x)
9551 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9552 || REGNO (x) == ARG_POINTER_REGNUM
9553 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9554 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9557 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9558 coprocessor registers. Otherwise return NO_REGS. */
9561 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9565 if (!TARGET_NEON_FP16)
9566 return GENERAL_REGS;
9567 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9569 return GENERAL_REGS;
9572 /* The neon move patterns handle all legitimate vector and struct
9575 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9576 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9577 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9578 || VALID_NEON_STRUCT_MODE (mode)))
9581 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9584 return GENERAL_REGS;
9587 /* Values which must be returned in the most-significant end of the return
9591 arm_return_in_msb (const_tree valtype)
9593 return (TARGET_AAPCS_BASED
9595 && (AGGREGATE_TYPE_P (valtype)
9596 || TREE_CODE (valtype) == COMPLEX_TYPE
9597 || FIXED_POINT_TYPE_P (valtype)));
9600 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9601 Use by the Cirrus Maverick code which has to workaround
9602 a hardware bug triggered by such instructions. */
9604 arm_memory_load_p (rtx insn)
9606 rtx body, lhs, rhs;;
9608 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9611 body = PATTERN (insn);
9613 if (GET_CODE (body) != SET)
9616 lhs = XEXP (body, 0);
9617 rhs = XEXP (body, 1);
9619 lhs = REG_OR_SUBREG_RTX (lhs);
9621 /* If the destination is not a general purpose
9622 register we do not have to worry. */
9623 if (GET_CODE (lhs) != REG
9624 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9627 /* As well as loads from memory we also have to react
9628 to loads of invalid constants which will be turned
9629 into loads from the minipool. */
9630 return (GET_CODE (rhs) == MEM
9631 || GET_CODE (rhs) == SYMBOL_REF
9632 || note_invalid_constants (insn, -1, false));
9635 /* Return TRUE if INSN is a Cirrus instruction. */
9637 arm_cirrus_insn_p (rtx insn)
9639 enum attr_cirrus attr;
9641 /* get_attr cannot accept USE or CLOBBER. */
9643 || GET_CODE (insn) != INSN
9644 || GET_CODE (PATTERN (insn)) == USE
9645 || GET_CODE (PATTERN (insn)) == CLOBBER)
9648 attr = get_attr_cirrus (insn);
9650 return attr != CIRRUS_NOT;
9653 /* Cirrus reorg for invalid instruction combinations. */
9655 cirrus_reorg (rtx first)
9657 enum attr_cirrus attr;
9658 rtx body = PATTERN (first);
9662 /* Any branch must be followed by 2 non Cirrus instructions. */
9663 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9666 t = next_nonnote_insn (first);
9668 if (arm_cirrus_insn_p (t))
9671 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9675 emit_insn_after (gen_nop (), first);
9680 /* (float (blah)) is in parallel with a clobber. */
9681 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9682 body = XVECEXP (body, 0, 0);
9684 if (GET_CODE (body) == SET)
9686 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9688 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9689 be followed by a non Cirrus insn. */
9690 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9692 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9693 emit_insn_after (gen_nop (), first);
9697 else if (arm_memory_load_p (first))
9699 unsigned int arm_regno;
9701 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9702 ldr/cfmv64hr combination where the Rd field is the same
9703 in both instructions must be split with a non Cirrus
9710 /* Get Arm register number for ldr insn. */
9711 if (GET_CODE (lhs) == REG)
9712 arm_regno = REGNO (lhs);
9715 gcc_assert (GET_CODE (rhs) == REG);
9716 arm_regno = REGNO (rhs);
9720 first = next_nonnote_insn (first);
9722 if (! arm_cirrus_insn_p (first))
9725 body = PATTERN (first);
9727 /* (float (blah)) is in parallel with a clobber. */
9728 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9729 body = XVECEXP (body, 0, 0);
9731 if (GET_CODE (body) == FLOAT)
9732 body = XEXP (body, 0);
9734 if (get_attr_cirrus (first) == CIRRUS_MOVE
9735 && GET_CODE (XEXP (body, 1)) == REG
9736 && arm_regno == REGNO (XEXP (body, 1)))
9737 emit_insn_after (gen_nop (), first);
9743 /* get_attr cannot accept USE or CLOBBER. */
9745 || GET_CODE (first) != INSN
9746 || GET_CODE (PATTERN (first)) == USE
9747 || GET_CODE (PATTERN (first)) == CLOBBER)
9750 attr = get_attr_cirrus (first);
9752 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9753 must be followed by a non-coprocessor instruction. */
9754 if (attr == CIRRUS_COMPARE)
9758 t = next_nonnote_insn (first);
9760 if (arm_cirrus_insn_p (t))
9763 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9767 emit_insn_after (gen_nop (), first);
9773 /* Return TRUE if X references a SYMBOL_REF. */
9775 symbol_mentioned_p (rtx x)
9780 if (GET_CODE (x) == SYMBOL_REF)
9783 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9784 are constant offsets, not symbols. */
9785 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9788 fmt = GET_RTX_FORMAT (GET_CODE (x));
9790 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9796 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9797 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9800 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9807 /* Return TRUE if X references a LABEL_REF. */
9809 label_mentioned_p (rtx x)
9814 if (GET_CODE (x) == LABEL_REF)
9817 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9818 instruction, but they are constant offsets, not symbols. */
9819 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9822 fmt = GET_RTX_FORMAT (GET_CODE (x));
9823 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9829 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9830 if (label_mentioned_p (XVECEXP (x, i, j)))
9833 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9841 tls_mentioned_p (rtx x)
9843 switch (GET_CODE (x))
9846 return tls_mentioned_p (XEXP (x, 0));
9849 if (XINT (x, 1) == UNSPEC_TLS)
9857 /* Must not copy any rtx that uses a pc-relative address. */
9860 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9862 if (GET_CODE (*x) == UNSPEC
9863 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9869 arm_cannot_copy_insn_p (rtx insn)
9871 /* The tls call insn cannot be copied, as it is paired with a data
9873 if (recog_memoized (insn) == CODE_FOR_tlscall)
9876 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9882 enum rtx_code code = GET_CODE (x);
9899 /* Return 1 if memory locations are adjacent. */
9901 adjacent_mem_locations (rtx a, rtx b)
9903 /* We don't guarantee to preserve the order of these memory refs. */
9904 if (volatile_refs_p (a) || volatile_refs_p (b))
9907 if ((GET_CODE (XEXP (a, 0)) == REG
9908 || (GET_CODE (XEXP (a, 0)) == PLUS
9909 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9910 && (GET_CODE (XEXP (b, 0)) == REG
9911 || (GET_CODE (XEXP (b, 0)) == PLUS
9912 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9914 HOST_WIDE_INT val0 = 0, val1 = 0;
9918 if (GET_CODE (XEXP (a, 0)) == PLUS)
9920 reg0 = XEXP (XEXP (a, 0), 0);
9921 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9926 if (GET_CODE (XEXP (b, 0)) == PLUS)
9928 reg1 = XEXP (XEXP (b, 0), 0);
9929 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9934 /* Don't accept any offset that will require multiple
9935 instructions to handle, since this would cause the
9936 arith_adjacentmem pattern to output an overlong sequence. */
9937 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9940 /* Don't allow an eliminable register: register elimination can make
9941 the offset too large. */
9942 if (arm_eliminable_register (reg0))
9945 val_diff = val1 - val0;
9949 /* If the target has load delay slots, then there's no benefit
9950 to using an ldm instruction unless the offset is zero and
9951 we are optimizing for size. */
9952 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9953 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9954 && (val_diff == 4 || val_diff == -4));
9957 return ((REGNO (reg0) == REGNO (reg1))
9958 && (val_diff == 4 || val_diff == -4));
9964 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9965 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9966 instruction. ADD_OFFSET is nonzero if the base address register needs
9967 to be modified with an add instruction before we can use it. */
9970 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9971 int nops, HOST_WIDE_INT add_offset)
9973 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9974 if the offset isn't small enough. The reason 2 ldrs are faster
9975 is because these ARMs are able to do more than one cache access
9976 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9977 whilst the ARM8 has a double bandwidth cache. This means that
9978 these cores can do both an instruction fetch and a data fetch in
9979 a single cycle, so the trick of calculating the address into a
9980 scratch register (one of the result regs) and then doing a load
9981 multiple actually becomes slower (and no smaller in code size).
9982 That is the transformation
9984 ldr rd1, [rbase + offset]
9985 ldr rd2, [rbase + offset + 4]
9989 add rd1, rbase, offset
9990 ldmia rd1, {rd1, rd2}
9992 produces worse code -- '3 cycles + any stalls on rd2' instead of
9993 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9994 access per cycle, the first sequence could never complete in less
9995 than 6 cycles, whereas the ldm sequence would only take 5 and
9996 would make better use of sequential accesses if not hitting the
9999 We cheat here and test 'arm_ld_sched' which we currently know to
10000 only be true for the ARM8, ARM9 and StrongARM. If this ever
10001 changes, then the test below needs to be reworked. */
10002 if (nops == 2 && arm_ld_sched && add_offset != 0)
10005 /* XScale has load-store double instructions, but they have stricter
10006 alignment requirements than load-store multiple, so we cannot
10009 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10010 the pipeline until completion.
10018 An ldr instruction takes 1-3 cycles, but does not block the
10027 Best case ldr will always win. However, the more ldr instructions
10028 we issue, the less likely we are to be able to schedule them well.
10029 Using ldr instructions also increases code size.
10031 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10032 for counts of 3 or 4 regs. */
10033 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10038 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10039 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10040 an array ORDER which describes the sequence to use when accessing the
10041 offsets that produces an ascending order. In this sequence, each
10042 offset must be larger by exactly 4 than the previous one. ORDER[0]
10043 must have been filled in with the lowest offset by the caller.
10044 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10045 we use to verify that ORDER produces an ascending order of registers.
10046 Return true if it was possible to construct such an order, false if
10050 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10051 int *unsorted_regs)
10054 for (i = 1; i < nops; i++)
10058 order[i] = order[i - 1];
10059 for (j = 0; j < nops; j++)
10060 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10062 /* We must find exactly one offset that is higher than the
10063 previous one by 4. */
10064 if (order[i] != order[i - 1])
10068 if (order[i] == order[i - 1])
10070 /* The register numbers must be ascending. */
10071 if (unsorted_regs != NULL
10072 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10078 /* Used to determine in a peephole whether a sequence of load
10079 instructions can be changed into a load-multiple instruction.
10080 NOPS is the number of separate load instructions we are examining. The
10081 first NOPS entries in OPERANDS are the destination registers, the
10082 next NOPS entries are memory operands. If this function is
10083 successful, *BASE is set to the common base register of the memory
10084 accesses; *LOAD_OFFSET is set to the first memory location's offset
10085 from that base register.
10086 REGS is an array filled in with the destination register numbers.
10087 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10088 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10089 the sequence of registers in REGS matches the loads from ascending memory
10090 locations, and the function verifies that the register numbers are
10091 themselves ascending. If CHECK_REGS is false, the register numbers
10092 are stored in the order they are found in the operands. */
10094 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10095 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10097 int unsorted_regs[MAX_LDM_STM_OPS];
10098 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10099 int order[MAX_LDM_STM_OPS];
10100 rtx base_reg_rtx = NULL;
10104 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10105 easily extended if required. */
10106 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10108 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10110 /* Loop over the operands and check that the memory references are
10111 suitable (i.e. immediate offsets from the same base register). At
10112 the same time, extract the target register, and the memory
10114 for (i = 0; i < nops; i++)
10119 /* Convert a subreg of a mem into the mem itself. */
10120 if (GET_CODE (operands[nops + i]) == SUBREG)
10121 operands[nops + i] = alter_subreg (operands + (nops + i));
10123 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10125 /* Don't reorder volatile memory references; it doesn't seem worth
10126 looking for the case where the order is ok anyway. */
10127 if (MEM_VOLATILE_P (operands[nops + i]))
10130 offset = const0_rtx;
10132 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10133 || (GET_CODE (reg) == SUBREG
10134 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10135 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10136 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10138 || (GET_CODE (reg) == SUBREG
10139 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10140 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10145 base_reg = REGNO (reg);
10146 base_reg_rtx = reg;
10147 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10150 else if (base_reg != (int) REGNO (reg))
10151 /* Not addressed from the same base register. */
10154 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10155 ? REGNO (operands[i])
10156 : REGNO (SUBREG_REG (operands[i])));
10158 /* If it isn't an integer register, or if it overwrites the
10159 base register but isn't the last insn in the list, then
10160 we can't do this. */
10161 if (unsorted_regs[i] < 0
10162 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10163 || unsorted_regs[i] > 14
10164 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10167 unsorted_offsets[i] = INTVAL (offset);
10168 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10172 /* Not a suitable memory address. */
10176 /* All the useful information has now been extracted from the
10177 operands into unsorted_regs and unsorted_offsets; additionally,
10178 order[0] has been set to the lowest offset in the list. Sort
10179 the offsets into order, verifying that they are adjacent, and
10180 check that the register numbers are ascending. */
10181 if (!compute_offset_order (nops, unsorted_offsets, order,
10182 check_regs ? unsorted_regs : NULL))
10186 memcpy (saved_order, order, sizeof order);
10192 for (i = 0; i < nops; i++)
10193 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10195 *load_offset = unsorted_offsets[order[0]];
10199 && !peep2_reg_dead_p (nops, base_reg_rtx))
10202 if (unsorted_offsets[order[0]] == 0)
10203 ldm_case = 1; /* ldmia */
10204 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10205 ldm_case = 2; /* ldmib */
10206 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10207 ldm_case = 3; /* ldmda */
10208 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10209 ldm_case = 4; /* ldmdb */
10210 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10211 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10216 if (!multiple_operation_profitable_p (false, nops,
10218 ? unsorted_offsets[order[0]] : 0))
10224 /* Used to determine in a peephole whether a sequence of store instructions can
10225 be changed into a store-multiple instruction.
10226 NOPS is the number of separate store instructions we are examining.
10227 NOPS_TOTAL is the total number of instructions recognized by the peephole
10229 The first NOPS entries in OPERANDS are the source registers, the next
10230 NOPS entries are memory operands. If this function is successful, *BASE is
10231 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10232 to the first memory location's offset from that base register. REGS is an
10233 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10234 likewise filled with the corresponding rtx's.
10235 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10236 numbers to an ascending order of stores.
10237 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10238 from ascending memory locations, and the function verifies that the register
10239 numbers are themselves ascending. If CHECK_REGS is false, the register
10240 numbers are stored in the order they are found in the operands. */
10242 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10243 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10244 HOST_WIDE_INT *load_offset, bool check_regs)
10246 int unsorted_regs[MAX_LDM_STM_OPS];
10247 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10248 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10249 int order[MAX_LDM_STM_OPS];
10251 rtx base_reg_rtx = NULL;
10254 /* Write back of base register is currently only supported for Thumb 1. */
10255 int base_writeback = TARGET_THUMB1;
10257 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10258 easily extended if required. */
10259 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10261 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10263 /* Loop over the operands and check that the memory references are
10264 suitable (i.e. immediate offsets from the same base register). At
10265 the same time, extract the target register, and the memory
10267 for (i = 0; i < nops; i++)
10272 /* Convert a subreg of a mem into the mem itself. */
10273 if (GET_CODE (operands[nops + i]) == SUBREG)
10274 operands[nops + i] = alter_subreg (operands + (nops + i));
10276 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10278 /* Don't reorder volatile memory references; it doesn't seem worth
10279 looking for the case where the order is ok anyway. */
10280 if (MEM_VOLATILE_P (operands[nops + i]))
10283 offset = const0_rtx;
10285 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10286 || (GET_CODE (reg) == SUBREG
10287 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10288 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10289 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10291 || (GET_CODE (reg) == SUBREG
10292 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10293 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10296 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10297 ? operands[i] : SUBREG_REG (operands[i]));
10298 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10302 base_reg = REGNO (reg);
10303 base_reg_rtx = reg;
10304 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10307 else if (base_reg != (int) REGNO (reg))
10308 /* Not addressed from the same base register. */
10311 /* If it isn't an integer register, then we can't do this. */
10312 if (unsorted_regs[i] < 0
10313 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10314 /* The effects are unpredictable if the base register is
10315 both updated and stored. */
10316 || (base_writeback && unsorted_regs[i] == base_reg)
10317 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10318 || unsorted_regs[i] > 14)
10321 unsorted_offsets[i] = INTVAL (offset);
10322 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10326 /* Not a suitable memory address. */
10330 /* All the useful information has now been extracted from the
10331 operands into unsorted_regs and unsorted_offsets; additionally,
10332 order[0] has been set to the lowest offset in the list. Sort
10333 the offsets into order, verifying that they are adjacent, and
10334 check that the register numbers are ascending. */
10335 if (!compute_offset_order (nops, unsorted_offsets, order,
10336 check_regs ? unsorted_regs : NULL))
10340 memcpy (saved_order, order, sizeof order);
10346 for (i = 0; i < nops; i++)
10348 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10350 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10353 *load_offset = unsorted_offsets[order[0]];
10357 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10360 if (unsorted_offsets[order[0]] == 0)
10361 stm_case = 1; /* stmia */
10362 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10363 stm_case = 2; /* stmib */
10364 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10365 stm_case = 3; /* stmda */
10366 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10367 stm_case = 4; /* stmdb */
10371 if (!multiple_operation_profitable_p (false, nops, 0))
10377 /* Routines for use in generating RTL. */
10379 /* Generate a load-multiple instruction. COUNT is the number of loads in
10380 the instruction; REGS and MEMS are arrays containing the operands.
10381 BASEREG is the base register to be used in addressing the memory operands.
10382 WBACK_OFFSET is nonzero if the instruction should update the base
10386 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10387 HOST_WIDE_INT wback_offset)
10392 if (!multiple_operation_profitable_p (false, count, 0))
10398 for (i = 0; i < count; i++)
10399 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10401 if (wback_offset != 0)
10402 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10404 seq = get_insns ();
10410 result = gen_rtx_PARALLEL (VOIDmode,
10411 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10412 if (wback_offset != 0)
10414 XVECEXP (result, 0, 0)
10415 = gen_rtx_SET (VOIDmode, basereg,
10416 plus_constant (basereg, wback_offset));
10421 for (j = 0; i < count; i++, j++)
10422 XVECEXP (result, 0, i)
10423 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10428 /* Generate a store-multiple instruction. COUNT is the number of stores in
10429 the instruction; REGS and MEMS are arrays containing the operands.
10430 BASEREG is the base register to be used in addressing the memory operands.
10431 WBACK_OFFSET is nonzero if the instruction should update the base
10435 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10436 HOST_WIDE_INT wback_offset)
10441 if (GET_CODE (basereg) == PLUS)
10442 basereg = XEXP (basereg, 0);
10444 if (!multiple_operation_profitable_p (false, count, 0))
10450 for (i = 0; i < count; i++)
10451 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10453 if (wback_offset != 0)
10454 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10456 seq = get_insns ();
10462 result = gen_rtx_PARALLEL (VOIDmode,
10463 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10464 if (wback_offset != 0)
10466 XVECEXP (result, 0, 0)
10467 = gen_rtx_SET (VOIDmode, basereg,
10468 plus_constant (basereg, wback_offset));
10473 for (j = 0; i < count; i++, j++)
10474 XVECEXP (result, 0, i)
10475 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10480 /* Generate either a load-multiple or a store-multiple instruction. This
10481 function can be used in situations where we can start with a single MEM
10482 rtx and adjust its address upwards.
10483 COUNT is the number of operations in the instruction, not counting a
10484 possible update of the base register. REGS is an array containing the
10486 BASEREG is the base register to be used in addressing the memory operands,
10487 which are constructed from BASEMEM.
10488 WRITE_BACK specifies whether the generated instruction should include an
10489 update of the base register.
10490 OFFSETP is used to pass an offset to and from this function; this offset
10491 is not used when constructing the address (instead BASEMEM should have an
10492 appropriate offset in its address), it is used only for setting
10493 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10496 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10497 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10499 rtx mems[MAX_LDM_STM_OPS];
10500 HOST_WIDE_INT offset = *offsetp;
10503 gcc_assert (count <= MAX_LDM_STM_OPS);
10505 if (GET_CODE (basereg) == PLUS)
10506 basereg = XEXP (basereg, 0);
10508 for (i = 0; i < count; i++)
10510 rtx addr = plus_constant (basereg, i * 4);
10511 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10519 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10520 write_back ? 4 * count : 0);
10522 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10523 write_back ? 4 * count : 0);
10527 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10528 rtx basemem, HOST_WIDE_INT *offsetp)
10530 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10535 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10536 rtx basemem, HOST_WIDE_INT *offsetp)
10538 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10542 /* Called from a peephole2 expander to turn a sequence of loads into an
10543 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10544 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10545 is true if we can reorder the registers because they are used commutatively
10547 Returns true iff we could generate a new instruction. */
10550 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10552 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10553 rtx mems[MAX_LDM_STM_OPS];
10554 int i, j, base_reg;
10556 HOST_WIDE_INT offset;
10557 int write_back = FALSE;
10561 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10562 &base_reg, &offset, !sort_regs);
10568 for (i = 0; i < nops - 1; i++)
10569 for (j = i + 1; j < nops; j++)
10570 if (regs[i] > regs[j])
10576 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10580 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10581 gcc_assert (ldm_case == 1 || ldm_case == 5);
10587 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10588 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10590 if (!TARGET_THUMB1)
10592 base_reg = regs[0];
10593 base_reg_rtx = newbase;
10597 for (i = 0; i < nops; i++)
10599 addr = plus_constant (base_reg_rtx, offset + i * 4);
10600 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10603 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10604 write_back ? offset + i * 4 : 0));
10608 /* Called from a peephole2 expander to turn a sequence of stores into an
10609 STM instruction. OPERANDS are the operands found by the peephole matcher;
10610 NOPS indicates how many separate stores we are trying to combine.
10611 Returns true iff we could generate a new instruction. */
10614 gen_stm_seq (rtx *operands, int nops)
10617 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10618 rtx mems[MAX_LDM_STM_OPS];
10621 HOST_WIDE_INT offset;
10622 int write_back = FALSE;
10625 bool base_reg_dies;
10627 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10628 mem_order, &base_reg, &offset, true);
10633 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10635 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10638 gcc_assert (base_reg_dies);
10644 gcc_assert (base_reg_dies);
10645 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10649 addr = plus_constant (base_reg_rtx, offset);
10651 for (i = 0; i < nops; i++)
10653 addr = plus_constant (base_reg_rtx, offset + i * 4);
10654 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10657 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10658 write_back ? offset + i * 4 : 0));
10662 /* Called from a peephole2 expander to turn a sequence of stores that are
10663 preceded by constant loads into an STM instruction. OPERANDS are the
10664 operands found by the peephole matcher; NOPS indicates how many
10665 separate stores we are trying to combine; there are 2 * NOPS
10666 instructions in the peephole.
10667 Returns true iff we could generate a new instruction. */
10670 gen_const_stm_seq (rtx *operands, int nops)
10672 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10673 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10674 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10675 rtx mems[MAX_LDM_STM_OPS];
10678 HOST_WIDE_INT offset;
10679 int write_back = FALSE;
10682 bool base_reg_dies;
10684 HARD_REG_SET allocated;
10686 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10687 mem_order, &base_reg, &offset, false);
10692 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10694 /* If the same register is used more than once, try to find a free
10696 CLEAR_HARD_REG_SET (allocated);
10697 for (i = 0; i < nops; i++)
10699 for (j = i + 1; j < nops; j++)
10700 if (regs[i] == regs[j])
10702 rtx t = peep2_find_free_register (0, nops * 2,
10703 TARGET_THUMB1 ? "l" : "r",
10704 SImode, &allocated);
10708 regs[i] = REGNO (t);
10712 /* Compute an ordering that maps the register numbers to an ascending
10715 for (i = 0; i < nops; i++)
10716 if (regs[i] < regs[reg_order[0]])
10719 for (i = 1; i < nops; i++)
10721 int this_order = reg_order[i - 1];
10722 for (j = 0; j < nops; j++)
10723 if (regs[j] > regs[reg_order[i - 1]]
10724 && (this_order == reg_order[i - 1]
10725 || regs[j] < regs[this_order]))
10727 reg_order[i] = this_order;
10730 /* Ensure that registers that must be live after the instruction end
10731 up with the correct value. */
10732 for (i = 0; i < nops; i++)
10734 int this_order = reg_order[i];
10735 if ((this_order != mem_order[i]
10736 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10737 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10741 /* Load the constants. */
10742 for (i = 0; i < nops; i++)
10744 rtx op = operands[2 * nops + mem_order[i]];
10745 sorted_regs[i] = regs[reg_order[i]];
10746 emit_move_insn (reg_rtxs[reg_order[i]], op);
10749 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10751 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10754 gcc_assert (base_reg_dies);
10760 gcc_assert (base_reg_dies);
10761 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10765 addr = plus_constant (base_reg_rtx, offset);
10767 for (i = 0; i < nops; i++)
10769 addr = plus_constant (base_reg_rtx, offset + i * 4);
10770 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10773 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10774 write_back ? offset + i * 4 : 0));
10778 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10779 unaligned copies on processors which support unaligned semantics for those
10780 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10781 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10782 An interleave factor of 1 (the minimum) will perform no interleaving.
10783 Load/store multiple are used for aligned addresses where possible. */
10786 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10787 HOST_WIDE_INT length,
10788 unsigned int interleave_factor)
10790 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10791 int *regnos = XALLOCAVEC (int, interleave_factor);
10792 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10793 HOST_WIDE_INT i, j;
10794 HOST_WIDE_INT remaining = length, words;
10795 rtx halfword_tmp = NULL, byte_tmp = NULL;
10797 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10798 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10799 HOST_WIDE_INT srcoffset, dstoffset;
10800 HOST_WIDE_INT src_autoinc, dst_autoinc;
10803 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10805 /* Use hard registers if we have aligned source or destination so we can use
10806 load/store multiple with contiguous registers. */
10807 if (dst_aligned || src_aligned)
10808 for (i = 0; i < interleave_factor; i++)
10809 regs[i] = gen_rtx_REG (SImode, i);
10811 for (i = 0; i < interleave_factor; i++)
10812 regs[i] = gen_reg_rtx (SImode);
10814 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10815 src = copy_addr_to_reg (XEXP (srcbase, 0));
10817 srcoffset = dstoffset = 0;
10819 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10820 For copying the last bytes we want to subtract this offset again. */
10821 src_autoinc = dst_autoinc = 0;
10823 for (i = 0; i < interleave_factor; i++)
10826 /* Copy BLOCK_SIZE_BYTES chunks. */
10828 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10831 if (src_aligned && interleave_factor > 1)
10833 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10834 TRUE, srcbase, &srcoffset));
10835 src_autoinc += UNITS_PER_WORD * interleave_factor;
10839 for (j = 0; j < interleave_factor; j++)
10841 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10843 mem = adjust_automodify_address (srcbase, SImode, addr,
10844 srcoffset + j * UNITS_PER_WORD);
10845 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10847 srcoffset += block_size_bytes;
10851 if (dst_aligned && interleave_factor > 1)
10853 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10854 TRUE, dstbase, &dstoffset));
10855 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10859 for (j = 0; j < interleave_factor; j++)
10861 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
10863 mem = adjust_automodify_address (dstbase, SImode, addr,
10864 dstoffset + j * UNITS_PER_WORD);
10865 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10867 dstoffset += block_size_bytes;
10870 remaining -= block_size_bytes;
10873 /* Copy any whole words left (note these aren't interleaved with any
10874 subsequent halfword/byte load/stores in the interests of simplicity). */
10876 words = remaining / UNITS_PER_WORD;
10878 gcc_assert (words < interleave_factor);
10880 if (src_aligned && words > 1)
10882 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
10884 src_autoinc += UNITS_PER_WORD * words;
10888 for (j = 0; j < words; j++)
10890 addr = plus_constant (src,
10891 srcoffset + j * UNITS_PER_WORD - src_autoinc);
10892 mem = adjust_automodify_address (srcbase, SImode, addr,
10893 srcoffset + j * UNITS_PER_WORD);
10894 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10896 srcoffset += words * UNITS_PER_WORD;
10899 if (dst_aligned && words > 1)
10901 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
10903 dst_autoinc += words * UNITS_PER_WORD;
10907 for (j = 0; j < words; j++)
10909 addr = plus_constant (dst,
10910 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
10911 mem = adjust_automodify_address (dstbase, SImode, addr,
10912 dstoffset + j * UNITS_PER_WORD);
10913 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10915 dstoffset += words * UNITS_PER_WORD;
10918 remaining -= words * UNITS_PER_WORD;
10920 gcc_assert (remaining < 4);
10922 /* Copy a halfword if necessary. */
10924 if (remaining >= 2)
10926 halfword_tmp = gen_reg_rtx (SImode);
10928 addr = plus_constant (src, srcoffset - src_autoinc);
10929 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
10930 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
10932 /* Either write out immediately, or delay until we've loaded the last
10933 byte, depending on interleave factor. */
10934 if (interleave_factor == 1)
10936 addr = plus_constant (dst, dstoffset - dst_autoinc);
10937 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
10938 emit_insn (gen_unaligned_storehi (mem,
10939 gen_lowpart (HImode, halfword_tmp)));
10940 halfword_tmp = NULL;
10948 gcc_assert (remaining < 2);
10950 /* Copy last byte. */
10952 if ((remaining & 1) != 0)
10954 byte_tmp = gen_reg_rtx (SImode);
10956 addr = plus_constant (src, srcoffset - src_autoinc);
10957 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
10958 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
10960 if (interleave_factor == 1)
10962 addr = plus_constant (dst, dstoffset - dst_autoinc);
10963 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
10964 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
10973 /* Store last halfword if we haven't done so already. */
10977 addr = plus_constant (dst, dstoffset - dst_autoinc);
10978 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
10979 emit_insn (gen_unaligned_storehi (mem,
10980 gen_lowpart (HImode, halfword_tmp)));
10984 /* Likewise for last byte. */
10988 addr = plus_constant (dst, dstoffset - dst_autoinc);
10989 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
10990 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
10994 gcc_assert (remaining == 0 && srcoffset == dstoffset);
10997 /* From mips_adjust_block_mem:
10999 Helper function for doing a loop-based block operation on memory
11000 reference MEM. Each iteration of the loop will operate on LENGTH
11003 Create a new base register for use within the loop and point it to
11004 the start of MEM. Create a new memory reference that uses this
11005 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11008 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11011 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11013 /* Although the new mem does not refer to a known location,
11014 it does keep up to LENGTH bytes of alignment. */
11015 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11016 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11019 /* From mips_block_move_loop:
11021 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11022 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11023 the memory regions do not overlap. */
11026 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11027 unsigned int interleave_factor,
11028 HOST_WIDE_INT bytes_per_iter)
11030 rtx label, src_reg, dest_reg, final_src, test;
11031 HOST_WIDE_INT leftover;
11033 leftover = length % bytes_per_iter;
11034 length -= leftover;
11036 /* Create registers and memory references for use within the loop. */
11037 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11038 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11040 /* Calculate the value that SRC_REG should have after the last iteration of
11042 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11043 0, 0, OPTAB_WIDEN);
11045 /* Emit the start of the loop. */
11046 label = gen_label_rtx ();
11047 emit_label (label);
11049 /* Emit the loop body. */
11050 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11051 interleave_factor);
11053 /* Move on to the next block. */
11054 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11055 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11057 /* Emit the loop condition. */
11058 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11059 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11061 /* Mop up any left-over bytes. */
11063 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11066 /* Emit a block move when either the source or destination is unaligned (not
11067 aligned to a four-byte boundary). This may need further tuning depending on
11068 core type, optimize_size setting, etc. */
11071 arm_movmemqi_unaligned (rtx *operands)
11073 HOST_WIDE_INT length = INTVAL (operands[2]);
11077 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11078 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11079 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11080 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11081 or dst_aligned though: allow more interleaving in those cases since the
11082 resulting code can be smaller. */
11083 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11084 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11087 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11088 interleave_factor, bytes_per_iter);
11090 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11091 interleave_factor);
11095 /* Note that the loop created by arm_block_move_unaligned_loop may be
11096 subject to loop unrolling, which makes tuning this condition a little
11099 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11101 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11108 arm_gen_movmemqi (rtx *operands)
11110 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11111 HOST_WIDE_INT srcoffset, dstoffset;
11113 rtx src, dst, srcbase, dstbase;
11114 rtx part_bytes_reg = NULL;
11117 if (GET_CODE (operands[2]) != CONST_INT
11118 || GET_CODE (operands[3]) != CONST_INT
11119 || INTVAL (operands[2]) > 64)
11122 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11123 return arm_movmemqi_unaligned (operands);
11125 if (INTVAL (operands[3]) & 3)
11128 dstbase = operands[0];
11129 srcbase = operands[1];
11131 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11132 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11134 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11135 out_words_to_go = INTVAL (operands[2]) / 4;
11136 last_bytes = INTVAL (operands[2]) & 3;
11137 dstoffset = srcoffset = 0;
11139 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11140 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11142 for (i = 0; in_words_to_go >= 2; i+=4)
11144 if (in_words_to_go > 4)
11145 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11146 TRUE, srcbase, &srcoffset));
11148 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11149 src, FALSE, srcbase,
11152 if (out_words_to_go)
11154 if (out_words_to_go > 4)
11155 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11156 TRUE, dstbase, &dstoffset));
11157 else if (out_words_to_go != 1)
11158 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11159 out_words_to_go, dst,
11162 dstbase, &dstoffset));
11165 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11166 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11167 if (last_bytes != 0)
11169 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11175 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11176 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11179 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11180 if (out_words_to_go)
11184 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11185 sreg = copy_to_reg (mem);
11187 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11188 emit_move_insn (mem, sreg);
11191 gcc_assert (!in_words_to_go); /* Sanity check */
11194 if (in_words_to_go)
11196 gcc_assert (in_words_to_go > 0);
11198 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11199 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11202 gcc_assert (!last_bytes || part_bytes_reg);
11204 if (BYTES_BIG_ENDIAN && last_bytes)
11206 rtx tmp = gen_reg_rtx (SImode);
11208 /* The bytes we want are in the top end of the word. */
11209 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11210 GEN_INT (8 * (4 - last_bytes))));
11211 part_bytes_reg = tmp;
11215 mem = adjust_automodify_address (dstbase, QImode,
11216 plus_constant (dst, last_bytes - 1),
11217 dstoffset + last_bytes - 1);
11218 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11222 tmp = gen_reg_rtx (SImode);
11223 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11224 part_bytes_reg = tmp;
11231 if (last_bytes > 1)
11233 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11234 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11238 rtx tmp = gen_reg_rtx (SImode);
11239 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11240 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11241 part_bytes_reg = tmp;
11248 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11249 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11256 /* Select a dominance comparison mode if possible for a test of the general
11257 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11258 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11259 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11260 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11261 In all cases OP will be either EQ or NE, but we don't need to know which
11262 here. If we are unable to support a dominance comparison we return
11263 CC mode. This will then fail to match for the RTL expressions that
11264 generate this call. */
11266 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11268 enum rtx_code cond1, cond2;
11271 /* Currently we will probably get the wrong result if the individual
11272 comparisons are not simple. This also ensures that it is safe to
11273 reverse a comparison if necessary. */
11274 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11276 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11280 /* The if_then_else variant of this tests the second condition if the
11281 first passes, but is true if the first fails. Reverse the first
11282 condition to get a true "inclusive-or" expression. */
11283 if (cond_or == DOM_CC_NX_OR_Y)
11284 cond1 = reverse_condition (cond1);
11286 /* If the comparisons are not equal, and one doesn't dominate the other,
11287 then we can't do this. */
11289 && !comparison_dominates_p (cond1, cond2)
11290 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11295 enum rtx_code temp = cond1;
11303 if (cond_or == DOM_CC_X_AND_Y)
11308 case EQ: return CC_DEQmode;
11309 case LE: return CC_DLEmode;
11310 case LEU: return CC_DLEUmode;
11311 case GE: return CC_DGEmode;
11312 case GEU: return CC_DGEUmode;
11313 default: gcc_unreachable ();
11317 if (cond_or == DOM_CC_X_AND_Y)
11329 gcc_unreachable ();
11333 if (cond_or == DOM_CC_X_AND_Y)
11345 gcc_unreachable ();
11349 if (cond_or == DOM_CC_X_AND_Y)
11350 return CC_DLTUmode;
11355 return CC_DLTUmode;
11357 return CC_DLEUmode;
11361 gcc_unreachable ();
11365 if (cond_or == DOM_CC_X_AND_Y)
11366 return CC_DGTUmode;
11371 return CC_DGTUmode;
11373 return CC_DGEUmode;
11377 gcc_unreachable ();
11380 /* The remaining cases only occur when both comparisons are the
11383 gcc_assert (cond1 == cond2);
11387 gcc_assert (cond1 == cond2);
11391 gcc_assert (cond1 == cond2);
11395 gcc_assert (cond1 == cond2);
11396 return CC_DLEUmode;
11399 gcc_assert (cond1 == cond2);
11400 return CC_DGEUmode;
11403 gcc_unreachable ();
11408 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11410 /* All floating point compares return CCFP if it is an equality
11411 comparison, and CCFPE otherwise. */
11412 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11432 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11437 gcc_unreachable ();
11441 /* A compare with a shifted operand. Because of canonicalization, the
11442 comparison will have to be swapped when we emit the assembler. */
11443 if (GET_MODE (y) == SImode
11444 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11445 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11446 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11447 || GET_CODE (x) == ROTATERT))
11450 /* This operation is performed swapped, but since we only rely on the Z
11451 flag we don't need an additional mode. */
11452 if (GET_MODE (y) == SImode
11453 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11454 && GET_CODE (x) == NEG
11455 && (op == EQ || op == NE))
11458 /* This is a special case that is used by combine to allow a
11459 comparison of a shifted byte load to be split into a zero-extend
11460 followed by a comparison of the shifted integer (only valid for
11461 equalities and unsigned inequalities). */
11462 if (GET_MODE (x) == SImode
11463 && GET_CODE (x) == ASHIFT
11464 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11465 && GET_CODE (XEXP (x, 0)) == SUBREG
11466 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11467 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11468 && (op == EQ || op == NE
11469 || op == GEU || op == GTU || op == LTU || op == LEU)
11470 && GET_CODE (y) == CONST_INT)
11473 /* A construct for a conditional compare, if the false arm contains
11474 0, then both conditions must be true, otherwise either condition
11475 must be true. Not all conditions are possible, so CCmode is
11476 returned if it can't be done. */
11477 if (GET_CODE (x) == IF_THEN_ELSE
11478 && (XEXP (x, 2) == const0_rtx
11479 || XEXP (x, 2) == const1_rtx)
11480 && COMPARISON_P (XEXP (x, 0))
11481 && COMPARISON_P (XEXP (x, 1)))
11482 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11483 INTVAL (XEXP (x, 2)));
11485 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11486 if (GET_CODE (x) == AND
11487 && (op == EQ || op == NE)
11488 && COMPARISON_P (XEXP (x, 0))
11489 && COMPARISON_P (XEXP (x, 1)))
11490 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11493 if (GET_CODE (x) == IOR
11494 && (op == EQ || op == NE)
11495 && COMPARISON_P (XEXP (x, 0))
11496 && COMPARISON_P (XEXP (x, 1)))
11497 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11500 /* An operation (on Thumb) where we want to test for a single bit.
11501 This is done by shifting that bit up into the top bit of a
11502 scratch register; we can then branch on the sign bit. */
11504 && GET_MODE (x) == SImode
11505 && (op == EQ || op == NE)
11506 && GET_CODE (x) == ZERO_EXTRACT
11507 && XEXP (x, 1) == const1_rtx)
11510 /* An operation that sets the condition codes as a side-effect, the
11511 V flag is not set correctly, so we can only use comparisons where
11512 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11514 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11515 if (GET_MODE (x) == SImode
11517 && (op == EQ || op == NE || op == LT || op == GE)
11518 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11519 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11520 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11521 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11522 || GET_CODE (x) == LSHIFTRT
11523 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11524 || GET_CODE (x) == ROTATERT
11525 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11526 return CC_NOOVmode;
11528 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11531 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11532 && GET_CODE (x) == PLUS
11533 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11536 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11538 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11540 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11547 /* A DImode comparison against zero can be implemented by
11548 or'ing the two halves together. */
11549 if (y == const0_rtx)
11552 /* We can do an equality test in three Thumb instructions. */
11562 /* DImode unsigned comparisons can be implemented by cmp +
11563 cmpeq without a scratch register. Not worth doing in
11574 /* DImode signed and unsigned comparisons can be implemented
11575 by cmp + sbcs with a scratch register, but that does not
11576 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11577 gcc_assert (op != EQ && op != NE);
11581 gcc_unreachable ();
11588 /* X and Y are two things to compare using CODE. Emit the compare insn and
11589 return the rtx for register 0 in the proper mode. FP means this is a
11590 floating point compare: I don't think that it is needed on the arm. */
11592 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11594 enum machine_mode mode;
11596 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11598 /* We might have X as a constant, Y as a register because of the predicates
11599 used for cmpdi. If so, force X to a register here. */
11600 if (dimode_comparison && !REG_P (x))
11601 x = force_reg (DImode, x);
11603 mode = SELECT_CC_MODE (code, x, y);
11604 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11606 if (dimode_comparison
11607 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11608 && mode != CC_CZmode)
11612 /* To compare two non-zero values for equality, XOR them and
11613 then compare against zero. Not used for ARM mode; there
11614 CC_CZmode is cheaper. */
11615 if (mode == CC_Zmode && y != const0_rtx)
11617 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11620 /* A scratch register is required. */
11621 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11622 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11623 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11626 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11631 /* Generate a sequence of insns that will generate the correct return
11632 address mask depending on the physical architecture that the program
11635 arm_gen_return_addr_mask (void)
11637 rtx reg = gen_reg_rtx (Pmode);
11639 emit_insn (gen_return_addr_mask (reg));
11644 arm_reload_in_hi (rtx *operands)
11646 rtx ref = operands[1];
11648 HOST_WIDE_INT offset = 0;
11650 if (GET_CODE (ref) == SUBREG)
11652 offset = SUBREG_BYTE (ref);
11653 ref = SUBREG_REG (ref);
11656 if (GET_CODE (ref) == REG)
11658 /* We have a pseudo which has been spilt onto the stack; there
11659 are two cases here: the first where there is a simple
11660 stack-slot replacement and a second where the stack-slot is
11661 out of range, or is used as a subreg. */
11662 if (reg_equiv_mem (REGNO (ref)))
11664 ref = reg_equiv_mem (REGNO (ref));
11665 base = find_replacement (&XEXP (ref, 0));
11668 /* The slot is out of range, or was dressed up in a SUBREG. */
11669 base = reg_equiv_address (REGNO (ref));
11672 base = find_replacement (&XEXP (ref, 0));
11674 /* Handle the case where the address is too complex to be offset by 1. */
11675 if (GET_CODE (base) == MINUS
11676 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11678 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11680 emit_set_insn (base_plus, base);
11683 else if (GET_CODE (base) == PLUS)
11685 /* The addend must be CONST_INT, or we would have dealt with it above. */
11686 HOST_WIDE_INT hi, lo;
11688 offset += INTVAL (XEXP (base, 1));
11689 base = XEXP (base, 0);
11691 /* Rework the address into a legal sequence of insns. */
11692 /* Valid range for lo is -4095 -> 4095 */
11695 : -((-offset) & 0xfff));
11697 /* Corner case, if lo is the max offset then we would be out of range
11698 once we have added the additional 1 below, so bump the msb into the
11699 pre-loading insn(s). */
11703 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11704 ^ (HOST_WIDE_INT) 0x80000000)
11705 - (HOST_WIDE_INT) 0x80000000);
11707 gcc_assert (hi + lo == offset);
11711 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11713 /* Get the base address; addsi3 knows how to handle constants
11714 that require more than one insn. */
11715 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11721 /* Operands[2] may overlap operands[0] (though it won't overlap
11722 operands[1]), that's why we asked for a DImode reg -- so we can
11723 use the bit that does not overlap. */
11724 if (REGNO (operands[2]) == REGNO (operands[0]))
11725 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11727 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11729 emit_insn (gen_zero_extendqisi2 (scratch,
11730 gen_rtx_MEM (QImode,
11731 plus_constant (base,
11733 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11734 gen_rtx_MEM (QImode,
11735 plus_constant (base,
11737 if (!BYTES_BIG_ENDIAN)
11738 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11739 gen_rtx_IOR (SImode,
11742 gen_rtx_SUBREG (SImode, operands[0], 0),
11746 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11747 gen_rtx_IOR (SImode,
11748 gen_rtx_ASHIFT (SImode, scratch,
11750 gen_rtx_SUBREG (SImode, operands[0], 0)));
11753 /* Handle storing a half-word to memory during reload by synthesizing as two
11754 byte stores. Take care not to clobber the input values until after we
11755 have moved them somewhere safe. This code assumes that if the DImode
11756 scratch in operands[2] overlaps either the input value or output address
11757 in some way, then that value must die in this insn (we absolutely need
11758 two scratch registers for some corner cases). */
11760 arm_reload_out_hi (rtx *operands)
11762 rtx ref = operands[0];
11763 rtx outval = operands[1];
11765 HOST_WIDE_INT offset = 0;
11767 if (GET_CODE (ref) == SUBREG)
11769 offset = SUBREG_BYTE (ref);
11770 ref = SUBREG_REG (ref);
11773 if (GET_CODE (ref) == REG)
11775 /* We have a pseudo which has been spilt onto the stack; there
11776 are two cases here: the first where there is a simple
11777 stack-slot replacement and a second where the stack-slot is
11778 out of range, or is used as a subreg. */
11779 if (reg_equiv_mem (REGNO (ref)))
11781 ref = reg_equiv_mem (REGNO (ref));
11782 base = find_replacement (&XEXP (ref, 0));
11785 /* The slot is out of range, or was dressed up in a SUBREG. */
11786 base = reg_equiv_address (REGNO (ref));
11789 base = find_replacement (&XEXP (ref, 0));
11791 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11793 /* Handle the case where the address is too complex to be offset by 1. */
11794 if (GET_CODE (base) == MINUS
11795 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11797 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11799 /* Be careful not to destroy OUTVAL. */
11800 if (reg_overlap_mentioned_p (base_plus, outval))
11802 /* Updating base_plus might destroy outval, see if we can
11803 swap the scratch and base_plus. */
11804 if (!reg_overlap_mentioned_p (scratch, outval))
11807 scratch = base_plus;
11812 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11814 /* Be conservative and copy OUTVAL into the scratch now,
11815 this should only be necessary if outval is a subreg
11816 of something larger than a word. */
11817 /* XXX Might this clobber base? I can't see how it can,
11818 since scratch is known to overlap with OUTVAL, and
11819 must be wider than a word. */
11820 emit_insn (gen_movhi (scratch_hi, outval));
11821 outval = scratch_hi;
11825 emit_set_insn (base_plus, base);
11828 else if (GET_CODE (base) == PLUS)
11830 /* The addend must be CONST_INT, or we would have dealt with it above. */
11831 HOST_WIDE_INT hi, lo;
11833 offset += INTVAL (XEXP (base, 1));
11834 base = XEXP (base, 0);
11836 /* Rework the address into a legal sequence of insns. */
11837 /* Valid range for lo is -4095 -> 4095 */
11840 : -((-offset) & 0xfff));
11842 /* Corner case, if lo is the max offset then we would be out of range
11843 once we have added the additional 1 below, so bump the msb into the
11844 pre-loading insn(s). */
11848 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11849 ^ (HOST_WIDE_INT) 0x80000000)
11850 - (HOST_WIDE_INT) 0x80000000);
11852 gcc_assert (hi + lo == offset);
11856 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11858 /* Be careful not to destroy OUTVAL. */
11859 if (reg_overlap_mentioned_p (base_plus, outval))
11861 /* Updating base_plus might destroy outval, see if we
11862 can swap the scratch and base_plus. */
11863 if (!reg_overlap_mentioned_p (scratch, outval))
11866 scratch = base_plus;
11871 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11873 /* Be conservative and copy outval into scratch now,
11874 this should only be necessary if outval is a
11875 subreg of something larger than a word. */
11876 /* XXX Might this clobber base? I can't see how it
11877 can, since scratch is known to overlap with
11879 emit_insn (gen_movhi (scratch_hi, outval));
11880 outval = scratch_hi;
11884 /* Get the base address; addsi3 knows how to handle constants
11885 that require more than one insn. */
11886 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11892 if (BYTES_BIG_ENDIAN)
11894 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11895 plus_constant (base, offset + 1)),
11896 gen_lowpart (QImode, outval)));
11897 emit_insn (gen_lshrsi3 (scratch,
11898 gen_rtx_SUBREG (SImode, outval, 0),
11900 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11901 gen_lowpart (QImode, scratch)));
11905 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11906 gen_lowpart (QImode, outval)));
11907 emit_insn (gen_lshrsi3 (scratch,
11908 gen_rtx_SUBREG (SImode, outval, 0),
11910 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11911 plus_constant (base, offset + 1)),
11912 gen_lowpart (QImode, scratch)));
11916 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11917 (padded to the size of a word) should be passed in a register. */
11920 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11922 if (TARGET_AAPCS_BASED)
11923 return must_pass_in_stack_var_size (mode, type);
11925 return must_pass_in_stack_var_size_or_pad (mode, type);
11929 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11930 Return true if an argument passed on the stack should be padded upwards,
11931 i.e. if the least-significant byte has useful data.
11932 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11933 aggregate types are placed in the lowest memory address. */
11936 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
11938 if (!TARGET_AAPCS_BASED)
11939 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11941 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11948 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11949 Return !BYTES_BIG_ENDIAN if the least significant byte of the
11950 register has useful data, and return the opposite if the most
11951 significant byte does. */
11954 arm_pad_reg_upward (enum machine_mode mode,
11955 tree type, int first ATTRIBUTE_UNUSED)
11957 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
11959 /* For AAPCS, small aggregates, small fixed-point types,
11960 and small complex types are always padded upwards. */
11963 if ((AGGREGATE_TYPE_P (type)
11964 || TREE_CODE (type) == COMPLEX_TYPE
11965 || FIXED_POINT_TYPE_P (type))
11966 && int_size_in_bytes (type) <= 4)
11971 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
11972 && GET_MODE_SIZE (mode) <= 4)
11977 /* Otherwise, use default padding. */
11978 return !BYTES_BIG_ENDIAN;
11982 /* Print a symbolic form of X to the debug file, F. */
11984 arm_print_value (FILE *f, rtx x)
11986 switch (GET_CODE (x))
11989 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11993 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12001 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12003 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12004 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12012 fprintf (f, "\"%s\"", XSTR (x, 0));
12016 fprintf (f, "`%s'", XSTR (x, 0));
12020 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12024 arm_print_value (f, XEXP (x, 0));
12028 arm_print_value (f, XEXP (x, 0));
12030 arm_print_value (f, XEXP (x, 1));
12038 fprintf (f, "????");
12043 /* Routines for manipulation of the constant pool. */
12045 /* Arm instructions cannot load a large constant directly into a
12046 register; they have to come from a pc relative load. The constant
12047 must therefore be placed in the addressable range of the pc
12048 relative load. Depending on the precise pc relative load
12049 instruction the range is somewhere between 256 bytes and 4k. This
12050 means that we often have to dump a constant inside a function, and
12051 generate code to branch around it.
12053 It is important to minimize this, since the branches will slow
12054 things down and make the code larger.
12056 Normally we can hide the table after an existing unconditional
12057 branch so that there is no interruption of the flow, but in the
12058 worst case the code looks like this:
12076 We fix this by performing a scan after scheduling, which notices
12077 which instructions need to have their operands fetched from the
12078 constant table and builds the table.
12080 The algorithm starts by building a table of all the constants that
12081 need fixing up and all the natural barriers in the function (places
12082 where a constant table can be dropped without breaking the flow).
12083 For each fixup we note how far the pc-relative replacement will be
12084 able to reach and the offset of the instruction into the function.
12086 Having built the table we then group the fixes together to form
12087 tables that are as large as possible (subject to addressing
12088 constraints) and emit each table of constants after the last
12089 barrier that is within range of all the instructions in the group.
12090 If a group does not contain a barrier, then we forcibly create one
12091 by inserting a jump instruction into the flow. Once the table has
12092 been inserted, the insns are then modified to reference the
12093 relevant entry in the pool.
12095 Possible enhancements to the algorithm (not implemented) are:
12097 1) For some processors and object formats, there may be benefit in
12098 aligning the pools to the start of cache lines; this alignment
12099 would need to be taken into account when calculating addressability
12102 /* These typedefs are located at the start of this file, so that
12103 they can be used in the prototypes there. This comment is to
12104 remind readers of that fact so that the following structures
12105 can be understood more easily.
12107 typedef struct minipool_node Mnode;
12108 typedef struct minipool_fixup Mfix; */
12110 struct minipool_node
12112 /* Doubly linked chain of entries. */
12115 /* The maximum offset into the code that this entry can be placed. While
12116 pushing fixes for forward references, all entries are sorted in order
12117 of increasing max_address. */
12118 HOST_WIDE_INT max_address;
12119 /* Similarly for an entry inserted for a backwards ref. */
12120 HOST_WIDE_INT min_address;
12121 /* The number of fixes referencing this entry. This can become zero
12122 if we "unpush" an entry. In this case we ignore the entry when we
12123 come to emit the code. */
12125 /* The offset from the start of the minipool. */
12126 HOST_WIDE_INT offset;
12127 /* The value in table. */
12129 /* The mode of value. */
12130 enum machine_mode mode;
12131 /* The size of the value. With iWMMXt enabled
12132 sizes > 4 also imply an alignment of 8-bytes. */
12136 struct minipool_fixup
12140 HOST_WIDE_INT address;
12142 enum machine_mode mode;
12146 HOST_WIDE_INT forwards;
12147 HOST_WIDE_INT backwards;
12150 /* Fixes less than a word need padding out to a word boundary. */
12151 #define MINIPOOL_FIX_SIZE(mode) \
12152 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12154 static Mnode * minipool_vector_head;
12155 static Mnode * minipool_vector_tail;
12156 static rtx minipool_vector_label;
12157 static int minipool_pad;
12159 /* The linked list of all minipool fixes required for this function. */
12160 Mfix * minipool_fix_head;
12161 Mfix * minipool_fix_tail;
12162 /* The fix entry for the current minipool, once it has been placed. */
12163 Mfix * minipool_barrier;
12165 /* Determines if INSN is the start of a jump table. Returns the end
12166 of the TABLE or NULL_RTX. */
12168 is_jump_table (rtx insn)
12172 if (jump_to_label_p (insn)
12173 && ((table = next_real_insn (JUMP_LABEL (insn)))
12174 == next_real_insn (insn))
12176 && GET_CODE (table) == JUMP_INSN
12177 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12178 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12184 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12185 #define JUMP_TABLES_IN_TEXT_SECTION 0
12188 static HOST_WIDE_INT
12189 get_jump_table_size (rtx insn)
12191 /* ADDR_VECs only take room if read-only data does into the text
12193 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12195 rtx body = PATTERN (insn);
12196 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12197 HOST_WIDE_INT size;
12198 HOST_WIDE_INT modesize;
12200 modesize = GET_MODE_SIZE (GET_MODE (body));
12201 size = modesize * XVECLEN (body, elt);
12205 /* Round up size of TBB table to a halfword boundary. */
12206 size = (size + 1) & ~(HOST_WIDE_INT)1;
12209 /* No padding necessary for TBH. */
12212 /* Add two bytes for alignment on Thumb. */
12217 gcc_unreachable ();
12225 /* Return the maximum amount of padding that will be inserted before
12228 static HOST_WIDE_INT
12229 get_label_padding (rtx label)
12231 HOST_WIDE_INT align, min_insn_size;
12233 align = 1 << label_to_alignment (label);
12234 min_insn_size = TARGET_THUMB ? 2 : 4;
12235 return align > min_insn_size ? align - min_insn_size : 0;
12238 /* Move a minipool fix MP from its current location to before MAX_MP.
12239 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12240 constraints may need updating. */
12242 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12243 HOST_WIDE_INT max_address)
12245 /* The code below assumes these are different. */
12246 gcc_assert (mp != max_mp);
12248 if (max_mp == NULL)
12250 if (max_address < mp->max_address)
12251 mp->max_address = max_address;
12255 if (max_address > max_mp->max_address - mp->fix_size)
12256 mp->max_address = max_mp->max_address - mp->fix_size;
12258 mp->max_address = max_address;
12260 /* Unlink MP from its current position. Since max_mp is non-null,
12261 mp->prev must be non-null. */
12262 mp->prev->next = mp->next;
12263 if (mp->next != NULL)
12264 mp->next->prev = mp->prev;
12266 minipool_vector_tail = mp->prev;
12268 /* Re-insert it before MAX_MP. */
12270 mp->prev = max_mp->prev;
12273 if (mp->prev != NULL)
12274 mp->prev->next = mp;
12276 minipool_vector_head = mp;
12279 /* Save the new entry. */
12282 /* Scan over the preceding entries and adjust their addresses as
12284 while (mp->prev != NULL
12285 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12287 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12294 /* Add a constant to the minipool for a forward reference. Returns the
12295 node added or NULL if the constant will not fit in this pool. */
12297 add_minipool_forward_ref (Mfix *fix)
12299 /* If set, max_mp is the first pool_entry that has a lower
12300 constraint than the one we are trying to add. */
12301 Mnode * max_mp = NULL;
12302 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12305 /* If the minipool starts before the end of FIX->INSN then this FIX
12306 can not be placed into the current pool. Furthermore, adding the
12307 new constant pool entry may cause the pool to start FIX_SIZE bytes
12309 if (minipool_vector_head &&
12310 (fix->address + get_attr_length (fix->insn)
12311 >= minipool_vector_head->max_address - fix->fix_size))
12314 /* Scan the pool to see if a constant with the same value has
12315 already been added. While we are doing this, also note the
12316 location where we must insert the constant if it doesn't already
12318 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12320 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12321 && fix->mode == mp->mode
12322 && (GET_CODE (fix->value) != CODE_LABEL
12323 || (CODE_LABEL_NUMBER (fix->value)
12324 == CODE_LABEL_NUMBER (mp->value)))
12325 && rtx_equal_p (fix->value, mp->value))
12327 /* More than one fix references this entry. */
12329 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12332 /* Note the insertion point if necessary. */
12334 && mp->max_address > max_address)
12337 /* If we are inserting an 8-bytes aligned quantity and
12338 we have not already found an insertion point, then
12339 make sure that all such 8-byte aligned quantities are
12340 placed at the start of the pool. */
12341 if (ARM_DOUBLEWORD_ALIGN
12343 && fix->fix_size >= 8
12344 && mp->fix_size < 8)
12347 max_address = mp->max_address;
12351 /* The value is not currently in the minipool, so we need to create
12352 a new entry for it. If MAX_MP is NULL, the entry will be put on
12353 the end of the list since the placement is less constrained than
12354 any existing entry. Otherwise, we insert the new fix before
12355 MAX_MP and, if necessary, adjust the constraints on the other
12358 mp->fix_size = fix->fix_size;
12359 mp->mode = fix->mode;
12360 mp->value = fix->value;
12362 /* Not yet required for a backwards ref. */
12363 mp->min_address = -65536;
12365 if (max_mp == NULL)
12367 mp->max_address = max_address;
12369 mp->prev = minipool_vector_tail;
12371 if (mp->prev == NULL)
12373 minipool_vector_head = mp;
12374 minipool_vector_label = gen_label_rtx ();
12377 mp->prev->next = mp;
12379 minipool_vector_tail = mp;
12383 if (max_address > max_mp->max_address - mp->fix_size)
12384 mp->max_address = max_mp->max_address - mp->fix_size;
12386 mp->max_address = max_address;
12389 mp->prev = max_mp->prev;
12391 if (mp->prev != NULL)
12392 mp->prev->next = mp;
12394 minipool_vector_head = mp;
12397 /* Save the new entry. */
12400 /* Scan over the preceding entries and adjust their addresses as
12402 while (mp->prev != NULL
12403 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12405 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12413 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12414 HOST_WIDE_INT min_address)
12416 HOST_WIDE_INT offset;
12418 /* The code below assumes these are different. */
12419 gcc_assert (mp != min_mp);
12421 if (min_mp == NULL)
12423 if (min_address > mp->min_address)
12424 mp->min_address = min_address;
12428 /* We will adjust this below if it is too loose. */
12429 mp->min_address = min_address;
12431 /* Unlink MP from its current position. Since min_mp is non-null,
12432 mp->next must be non-null. */
12433 mp->next->prev = mp->prev;
12434 if (mp->prev != NULL)
12435 mp->prev->next = mp->next;
12437 minipool_vector_head = mp->next;
12439 /* Reinsert it after MIN_MP. */
12441 mp->next = min_mp->next;
12443 if (mp->next != NULL)
12444 mp->next->prev = mp;
12446 minipool_vector_tail = mp;
12452 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12454 mp->offset = offset;
12455 if (mp->refcount > 0)
12456 offset += mp->fix_size;
12458 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12459 mp->next->min_address = mp->min_address + mp->fix_size;
12465 /* Add a constant to the minipool for a backward reference. Returns the
12466 node added or NULL if the constant will not fit in this pool.
12468 Note that the code for insertion for a backwards reference can be
12469 somewhat confusing because the calculated offsets for each fix do
12470 not take into account the size of the pool (which is still under
12473 add_minipool_backward_ref (Mfix *fix)
12475 /* If set, min_mp is the last pool_entry that has a lower constraint
12476 than the one we are trying to add. */
12477 Mnode *min_mp = NULL;
12478 /* This can be negative, since it is only a constraint. */
12479 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12482 /* If we can't reach the current pool from this insn, or if we can't
12483 insert this entry at the end of the pool without pushing other
12484 fixes out of range, then we don't try. This ensures that we
12485 can't fail later on. */
12486 if (min_address >= minipool_barrier->address
12487 || (minipool_vector_tail->min_address + fix->fix_size
12488 >= minipool_barrier->address))
12491 /* Scan the pool to see if a constant with the same value has
12492 already been added. While we are doing this, also note the
12493 location where we must insert the constant if it doesn't already
12495 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12497 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12498 && fix->mode == mp->mode
12499 && (GET_CODE (fix->value) != CODE_LABEL
12500 || (CODE_LABEL_NUMBER (fix->value)
12501 == CODE_LABEL_NUMBER (mp->value)))
12502 && rtx_equal_p (fix->value, mp->value)
12503 /* Check that there is enough slack to move this entry to the
12504 end of the table (this is conservative). */
12505 && (mp->max_address
12506 > (minipool_barrier->address
12507 + minipool_vector_tail->offset
12508 + minipool_vector_tail->fix_size)))
12511 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12514 if (min_mp != NULL)
12515 mp->min_address += fix->fix_size;
12518 /* Note the insertion point if necessary. */
12519 if (mp->min_address < min_address)
12521 /* For now, we do not allow the insertion of 8-byte alignment
12522 requiring nodes anywhere but at the start of the pool. */
12523 if (ARM_DOUBLEWORD_ALIGN
12524 && fix->fix_size >= 8 && mp->fix_size < 8)
12529 else if (mp->max_address
12530 < minipool_barrier->address + mp->offset + fix->fix_size)
12532 /* Inserting before this entry would push the fix beyond
12533 its maximum address (which can happen if we have
12534 re-located a forwards fix); force the new fix to come
12536 if (ARM_DOUBLEWORD_ALIGN
12537 && fix->fix_size >= 8 && mp->fix_size < 8)
12542 min_address = mp->min_address + fix->fix_size;
12545 /* Do not insert a non-8-byte aligned quantity before 8-byte
12546 aligned quantities. */
12547 else if (ARM_DOUBLEWORD_ALIGN
12548 && fix->fix_size < 8
12549 && mp->fix_size >= 8)
12552 min_address = mp->min_address + fix->fix_size;
12557 /* We need to create a new entry. */
12559 mp->fix_size = fix->fix_size;
12560 mp->mode = fix->mode;
12561 mp->value = fix->value;
12563 mp->max_address = minipool_barrier->address + 65536;
12565 mp->min_address = min_address;
12567 if (min_mp == NULL)
12570 mp->next = minipool_vector_head;
12572 if (mp->next == NULL)
12574 minipool_vector_tail = mp;
12575 minipool_vector_label = gen_label_rtx ();
12578 mp->next->prev = mp;
12580 minipool_vector_head = mp;
12584 mp->next = min_mp->next;
12588 if (mp->next != NULL)
12589 mp->next->prev = mp;
12591 minipool_vector_tail = mp;
12594 /* Save the new entry. */
12602 /* Scan over the following entries and adjust their offsets. */
12603 while (mp->next != NULL)
12605 if (mp->next->min_address < mp->min_address + mp->fix_size)
12606 mp->next->min_address = mp->min_address + mp->fix_size;
12609 mp->next->offset = mp->offset + mp->fix_size;
12611 mp->next->offset = mp->offset;
12620 assign_minipool_offsets (Mfix *barrier)
12622 HOST_WIDE_INT offset = 0;
12625 minipool_barrier = barrier;
12627 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12629 mp->offset = offset;
12631 if (mp->refcount > 0)
12632 offset += mp->fix_size;
12636 /* Output the literal table */
12638 dump_minipool (rtx scan)
12644 if (ARM_DOUBLEWORD_ALIGN)
12645 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12646 if (mp->refcount > 0 && mp->fix_size >= 8)
12653 fprintf (dump_file,
12654 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12655 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12657 scan = emit_label_after (gen_label_rtx (), scan);
12658 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12659 scan = emit_label_after (minipool_vector_label, scan);
12661 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12663 if (mp->refcount > 0)
12667 fprintf (dump_file,
12668 ";; Offset %u, min %ld, max %ld ",
12669 (unsigned) mp->offset, (unsigned long) mp->min_address,
12670 (unsigned long) mp->max_address);
12671 arm_print_value (dump_file, mp->value);
12672 fputc ('\n', dump_file);
12675 switch (mp->fix_size)
12677 #ifdef HAVE_consttable_1
12679 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12683 #ifdef HAVE_consttable_2
12685 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12689 #ifdef HAVE_consttable_4
12691 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12695 #ifdef HAVE_consttable_8
12697 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12701 #ifdef HAVE_consttable_16
12703 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12708 gcc_unreachable ();
12716 minipool_vector_head = minipool_vector_tail = NULL;
12717 scan = emit_insn_after (gen_consttable_end (), scan);
12718 scan = emit_barrier_after (scan);
12721 /* Return the cost of forcibly inserting a barrier after INSN. */
12723 arm_barrier_cost (rtx insn)
12725 /* Basing the location of the pool on the loop depth is preferable,
12726 but at the moment, the basic block information seems to be
12727 corrupt by this stage of the compilation. */
12728 int base_cost = 50;
12729 rtx next = next_nonnote_insn (insn);
12731 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12734 switch (GET_CODE (insn))
12737 /* It will always be better to place the table before the label, rather
12746 return base_cost - 10;
12749 return base_cost + 10;
12753 /* Find the best place in the insn stream in the range
12754 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12755 Create the barrier by inserting a jump and add a new fix entry for
12758 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12760 HOST_WIDE_INT count = 0;
12762 rtx from = fix->insn;
12763 /* The instruction after which we will insert the jump. */
12764 rtx selected = NULL;
12766 /* The address at which the jump instruction will be placed. */
12767 HOST_WIDE_INT selected_address;
12769 HOST_WIDE_INT max_count = max_address - fix->address;
12770 rtx label = gen_label_rtx ();
12772 selected_cost = arm_barrier_cost (from);
12773 selected_address = fix->address;
12775 while (from && count < max_count)
12780 /* This code shouldn't have been called if there was a natural barrier
12782 gcc_assert (GET_CODE (from) != BARRIER);
12784 /* Count the length of this insn. This must stay in sync with the
12785 code that pushes minipool fixes. */
12786 if (LABEL_P (from))
12787 count += get_label_padding (from);
12789 count += get_attr_length (from);
12791 /* If there is a jump table, add its length. */
12792 tmp = is_jump_table (from);
12795 count += get_jump_table_size (tmp);
12797 /* Jump tables aren't in a basic block, so base the cost on
12798 the dispatch insn. If we select this location, we will
12799 still put the pool after the table. */
12800 new_cost = arm_barrier_cost (from);
12802 if (count < max_count
12803 && (!selected || new_cost <= selected_cost))
12806 selected_cost = new_cost;
12807 selected_address = fix->address + count;
12810 /* Continue after the dispatch table. */
12811 from = NEXT_INSN (tmp);
12815 new_cost = arm_barrier_cost (from);
12817 if (count < max_count
12818 && (!selected || new_cost <= selected_cost))
12821 selected_cost = new_cost;
12822 selected_address = fix->address + count;
12825 from = NEXT_INSN (from);
12828 /* Make sure that we found a place to insert the jump. */
12829 gcc_assert (selected);
12831 /* Make sure we do not split a call and its corresponding
12832 CALL_ARG_LOCATION note. */
12833 if (CALL_P (selected))
12835 rtx next = NEXT_INSN (selected);
12836 if (next && NOTE_P (next)
12837 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12841 /* Create a new JUMP_INSN that branches around a barrier. */
12842 from = emit_jump_insn_after (gen_jump (label), selected);
12843 JUMP_LABEL (from) = label;
12844 barrier = emit_barrier_after (from);
12845 emit_label_after (label, barrier);
12847 /* Create a minipool barrier entry for the new barrier. */
12848 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12849 new_fix->insn = barrier;
12850 new_fix->address = selected_address;
12851 new_fix->next = fix->next;
12852 fix->next = new_fix;
12857 /* Record that there is a natural barrier in the insn stream at
12860 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12862 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12865 fix->address = address;
12868 if (minipool_fix_head != NULL)
12869 minipool_fix_tail->next = fix;
12871 minipool_fix_head = fix;
12873 minipool_fix_tail = fix;
12876 /* Record INSN, which will need fixing up to load a value from the
12877 minipool. ADDRESS is the offset of the insn since the start of the
12878 function; LOC is a pointer to the part of the insn which requires
12879 fixing; VALUE is the constant that must be loaded, which is of type
12882 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12883 enum machine_mode mode, rtx value)
12885 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12888 fix->address = address;
12891 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12892 fix->value = value;
12893 fix->forwards = get_attr_pool_range (insn);
12894 fix->backwards = get_attr_neg_pool_range (insn);
12895 fix->minipool = NULL;
12897 /* If an insn doesn't have a range defined for it, then it isn't
12898 expecting to be reworked by this code. Better to stop now than
12899 to generate duff assembly code. */
12900 gcc_assert (fix->forwards || fix->backwards);
12902 /* If an entry requires 8-byte alignment then assume all constant pools
12903 require 4 bytes of padding. Trying to do this later on a per-pool
12904 basis is awkward because existing pool entries have to be modified. */
12905 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12910 fprintf (dump_file,
12911 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12912 GET_MODE_NAME (mode),
12913 INSN_UID (insn), (unsigned long) address,
12914 -1 * (long)fix->backwards, (long)fix->forwards);
12915 arm_print_value (dump_file, fix->value);
12916 fprintf (dump_file, "\n");
12919 /* Add it to the chain of fixes. */
12922 if (minipool_fix_head != NULL)
12923 minipool_fix_tail->next = fix;
12925 minipool_fix_head = fix;
12927 minipool_fix_tail = fix;
12930 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12931 Returns the number of insns needed, or 99 if we don't know how to
12934 arm_const_double_inline_cost (rtx val)
12936 rtx lowpart, highpart;
12937 enum machine_mode mode;
12939 mode = GET_MODE (val);
12941 if (mode == VOIDmode)
12944 gcc_assert (GET_MODE_SIZE (mode) == 8);
12946 lowpart = gen_lowpart (SImode, val);
12947 highpart = gen_highpart_mode (SImode, mode, val);
12949 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12950 gcc_assert (GET_CODE (highpart) == CONST_INT);
12952 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12953 NULL_RTX, NULL_RTX, 0, 0)
12954 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12955 NULL_RTX, NULL_RTX, 0, 0));
12958 /* Return true if it is worthwhile to split a 64-bit constant into two
12959 32-bit operations. This is the case if optimizing for size, or
12960 if we have load delay slots, or if one 32-bit part can be done with
12961 a single data operation. */
12963 arm_const_double_by_parts (rtx val)
12965 enum machine_mode mode = GET_MODE (val);
12968 if (optimize_size || arm_ld_sched)
12971 if (mode == VOIDmode)
12974 part = gen_highpart_mode (SImode, mode, val);
12976 gcc_assert (GET_CODE (part) == CONST_INT);
12978 if (const_ok_for_arm (INTVAL (part))
12979 || const_ok_for_arm (~INTVAL (part)))
12982 part = gen_lowpart (SImode, val);
12984 gcc_assert (GET_CODE (part) == CONST_INT);
12986 if (const_ok_for_arm (INTVAL (part))
12987 || const_ok_for_arm (~INTVAL (part)))
12993 /* Return true if it is possible to inline both the high and low parts
12994 of a 64-bit constant into 32-bit data processing instructions. */
12996 arm_const_double_by_immediates (rtx val)
12998 enum machine_mode mode = GET_MODE (val);
13001 if (mode == VOIDmode)
13004 part = gen_highpart_mode (SImode, mode, val);
13006 gcc_assert (GET_CODE (part) == CONST_INT);
13008 if (!const_ok_for_arm (INTVAL (part)))
13011 part = gen_lowpart (SImode, val);
13013 gcc_assert (GET_CODE (part) == CONST_INT);
13015 if (!const_ok_for_arm (INTVAL (part)))
13021 /* Scan INSN and note any of its operands that need fixing.
13022 If DO_PUSHES is false we do not actually push any of the fixups
13023 needed. The function returns TRUE if any fixups were needed/pushed.
13024 This is used by arm_memory_load_p() which needs to know about loads
13025 of constants that will be converted into minipool loads. */
13027 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13029 bool result = false;
13032 extract_insn (insn);
13034 if (!constrain_operands (1))
13035 fatal_insn_not_found (insn);
13037 if (recog_data.n_alternatives == 0)
13040 /* Fill in recog_op_alt with information about the constraints of
13042 preprocess_constraints ();
13044 for (opno = 0; opno < recog_data.n_operands; opno++)
13046 /* Things we need to fix can only occur in inputs. */
13047 if (recog_data.operand_type[opno] != OP_IN)
13050 /* If this alternative is a memory reference, then any mention
13051 of constants in this alternative is really to fool reload
13052 into allowing us to accept one there. We need to fix them up
13053 now so that we output the right code. */
13054 if (recog_op_alt[opno][which_alternative].memory_ok)
13056 rtx op = recog_data.operand[opno];
13058 if (CONSTANT_P (op))
13061 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13062 recog_data.operand_mode[opno], op);
13065 else if (GET_CODE (op) == MEM
13066 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13067 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13071 rtx cop = avoid_constant_pool_reference (op);
13073 /* Casting the address of something to a mode narrower
13074 than a word can cause avoid_constant_pool_reference()
13075 to return the pool reference itself. That's no good to
13076 us here. Lets just hope that we can use the
13077 constant pool value directly. */
13079 cop = get_pool_constant (XEXP (op, 0));
13081 push_minipool_fix (insn, address,
13082 recog_data.operand_loc[opno],
13083 recog_data.operand_mode[opno], cop);
13094 /* Convert instructions to their cc-clobbering variant if possible, since
13095 that allows us to use smaller encodings. */
13098 thumb2_reorg (void)
13103 INIT_REG_SET (&live);
13105 /* We are freeing block_for_insn in the toplev to keep compatibility
13106 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13107 compute_bb_for_insn ();
13114 COPY_REG_SET (&live, DF_LR_OUT (bb));
13115 df_simulate_initialize_backwards (bb, &live);
13116 FOR_BB_INSNS_REVERSE (bb, insn)
13118 if (NONJUMP_INSN_P (insn)
13119 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13121 rtx pat = PATTERN (insn);
13122 if (GET_CODE (pat) == SET
13123 && low_register_operand (XEXP (pat, 0), SImode)
13124 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13125 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13126 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13128 rtx dst = XEXP (pat, 0);
13129 rtx src = XEXP (pat, 1);
13130 rtx op0 = XEXP (src, 0);
13131 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13132 ? XEXP (src, 1) : NULL);
13134 if (rtx_equal_p (dst, op0)
13135 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13137 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13138 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13139 rtvec vec = gen_rtvec (2, pat, clobber);
13141 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13142 INSN_CODE (insn) = -1;
13144 /* We can also handle a commutative operation where the
13145 second operand matches the destination. */
13146 else if (op1 && rtx_equal_p (dst, op1))
13148 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13149 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13152 src = copy_rtx (src);
13153 XEXP (src, 0) = op1;
13154 XEXP (src, 1) = op0;
13155 pat = gen_rtx_SET (VOIDmode, dst, src);
13156 vec = gen_rtvec (2, pat, clobber);
13157 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13158 INSN_CODE (insn) = -1;
13163 if (NONDEBUG_INSN_P (insn))
13164 df_simulate_one_insn_backwards (bb, insn, &live);
13168 CLEAR_REG_SET (&live);
13171 /* Gcc puts the pool in the wrong place for ARM, since we can only
13172 load addresses a limited distance around the pc. We do some
13173 special munging to move the constant pool values to the correct
13174 point in the code. */
13179 HOST_WIDE_INT address = 0;
13185 minipool_fix_head = minipool_fix_tail = NULL;
13187 /* The first insn must always be a note, or the code below won't
13188 scan it properly. */
13189 insn = get_insns ();
13190 gcc_assert (GET_CODE (insn) == NOTE);
13193 /* Scan all the insns and record the operands that will need fixing. */
13194 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13196 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13197 && (arm_cirrus_insn_p (insn)
13198 || GET_CODE (insn) == JUMP_INSN
13199 || arm_memory_load_p (insn)))
13200 cirrus_reorg (insn);
13202 if (GET_CODE (insn) == BARRIER)
13203 push_minipool_barrier (insn, address);
13204 else if (INSN_P (insn))
13208 note_invalid_constants (insn, address, true);
13209 address += get_attr_length (insn);
13211 /* If the insn is a vector jump, add the size of the table
13212 and skip the table. */
13213 if ((table = is_jump_table (insn)) != NULL)
13215 address += get_jump_table_size (table);
13219 else if (LABEL_P (insn))
13220 /* Add the worst-case padding due to alignment. We don't add
13221 the _current_ padding because the minipool insertions
13222 themselves might change it. */
13223 address += get_label_padding (insn);
13226 fix = minipool_fix_head;
13228 /* Now scan the fixups and perform the required changes. */
13233 Mfix * last_added_fix;
13234 Mfix * last_barrier = NULL;
13237 /* Skip any further barriers before the next fix. */
13238 while (fix && GET_CODE (fix->insn) == BARRIER)
13241 /* No more fixes. */
13245 last_added_fix = NULL;
13247 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13249 if (GET_CODE (ftmp->insn) == BARRIER)
13251 if (ftmp->address >= minipool_vector_head->max_address)
13254 last_barrier = ftmp;
13256 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13259 last_added_fix = ftmp; /* Keep track of the last fix added. */
13262 /* If we found a barrier, drop back to that; any fixes that we
13263 could have reached but come after the barrier will now go in
13264 the next mini-pool. */
13265 if (last_barrier != NULL)
13267 /* Reduce the refcount for those fixes that won't go into this
13269 for (fdel = last_barrier->next;
13270 fdel && fdel != ftmp;
13273 fdel->minipool->refcount--;
13274 fdel->minipool = NULL;
13277 ftmp = last_barrier;
13281 /* ftmp is first fix that we can't fit into this pool and
13282 there no natural barriers that we could use. Insert a
13283 new barrier in the code somewhere between the previous
13284 fix and this one, and arrange to jump around it. */
13285 HOST_WIDE_INT max_address;
13287 /* The last item on the list of fixes must be a barrier, so
13288 we can never run off the end of the list of fixes without
13289 last_barrier being set. */
13292 max_address = minipool_vector_head->max_address;
13293 /* Check that there isn't another fix that is in range that
13294 we couldn't fit into this pool because the pool was
13295 already too large: we need to put the pool before such an
13296 instruction. The pool itself may come just after the
13297 fix because create_fix_barrier also allows space for a
13298 jump instruction. */
13299 if (ftmp->address < max_address)
13300 max_address = ftmp->address + 1;
13302 last_barrier = create_fix_barrier (last_added_fix, max_address);
13305 assign_minipool_offsets (last_barrier);
13309 if (GET_CODE (ftmp->insn) != BARRIER
13310 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13317 /* Scan over the fixes we have identified for this pool, fixing them
13318 up and adding the constants to the pool itself. */
13319 for (this_fix = fix; this_fix && ftmp != this_fix;
13320 this_fix = this_fix->next)
13321 if (GET_CODE (this_fix->insn) != BARRIER)
13324 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13325 minipool_vector_label),
13326 this_fix->minipool->offset);
13327 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13330 dump_minipool (last_barrier->insn);
13334 /* From now on we must synthesize any constants that we can't handle
13335 directly. This can happen if the RTL gets split during final
13336 instruction generation. */
13337 after_arm_reorg = 1;
13339 /* Free the minipool memory. */
13340 obstack_free (&minipool_obstack, minipool_startobj);
13343 /* Routines to output assembly language. */
13345 /* If the rtx is the correct value then return the string of the number.
13346 In this way we can ensure that valid double constants are generated even
13347 when cross compiling. */
13349 fp_immediate_constant (rtx x)
13354 if (!fp_consts_inited)
13357 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13358 for (i = 0; i < 8; i++)
13359 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13360 return strings_fp[i];
13362 gcc_unreachable ();
13365 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13366 static const char *
13367 fp_const_from_val (REAL_VALUE_TYPE *r)
13371 if (!fp_consts_inited)
13374 for (i = 0; i < 8; i++)
13375 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13376 return strings_fp[i];
13378 gcc_unreachable ();
13381 /* Output the operands of a LDM/STM instruction to STREAM.
13382 MASK is the ARM register set mask of which only bits 0-15 are important.
13383 REG is the base register, either the frame pointer or the stack pointer,
13384 INSTR is the possibly suffixed load or store instruction.
13385 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13388 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13389 unsigned long mask, int rfe)
13392 bool not_first = FALSE;
13394 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13395 fputc ('\t', stream);
13396 asm_fprintf (stream, instr, reg);
13397 fputc ('{', stream);
13399 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13400 if (mask & (1 << i))
13403 fprintf (stream, ", ");
13405 asm_fprintf (stream, "%r", i);
13410 fprintf (stream, "}^\n");
13412 fprintf (stream, "}\n");
13416 /* Output a FLDMD instruction to STREAM.
13417 BASE if the register containing the address.
13418 REG and COUNT specify the register range.
13419 Extra registers may be added to avoid hardware bugs.
13421 We output FLDMD even for ARMv5 VFP implementations. Although
13422 FLDMD is technically not supported until ARMv6, it is believed
13423 that all VFP implementations support its use in this context. */
13426 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13430 /* Workaround ARM10 VFPr1 bug. */
13431 if (count == 2 && !arm_arch6)
13438 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13439 load into multiple parts if we have to handle more than 16 registers. */
13442 vfp_output_fldmd (stream, base, reg, 16);
13443 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13447 fputc ('\t', stream);
13448 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13450 for (i = reg; i < reg + count; i++)
13453 fputs (", ", stream);
13454 asm_fprintf (stream, "d%d", i);
13456 fputs ("}\n", stream);
13461 /* Output the assembly for a store multiple. */
13464 vfp_output_fstmd (rtx * operands)
13471 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13472 p = strlen (pattern);
13474 gcc_assert (GET_CODE (operands[1]) == REG);
13476 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13477 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13479 p += sprintf (&pattern[p], ", d%d", base + i);
13481 strcpy (&pattern[p], "}");
13483 output_asm_insn (pattern, operands);
13488 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13489 number of bytes pushed. */
13492 vfp_emit_fstmd (int base_reg, int count)
13499 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13500 register pairs are stored by a store multiple insn. We avoid this
13501 by pushing an extra pair. */
13502 if (count == 2 && !arm_arch6)
13504 if (base_reg == LAST_VFP_REGNUM - 3)
13509 /* FSTMD may not store more than 16 doubleword registers at once. Split
13510 larger stores into multiple parts (up to a maximum of two, in
13515 /* NOTE: base_reg is an internal register number, so each D register
13517 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13518 saved += vfp_emit_fstmd (base_reg, 16);
13522 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13523 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13525 reg = gen_rtx_REG (DFmode, base_reg);
13528 XVECEXP (par, 0, 0)
13529 = gen_rtx_SET (VOIDmode,
13532 gen_rtx_PRE_MODIFY (Pmode,
13535 (stack_pointer_rtx,
13538 gen_rtx_UNSPEC (BLKmode,
13539 gen_rtvec (1, reg),
13540 UNSPEC_PUSH_MULT));
13542 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13543 plus_constant (stack_pointer_rtx, -(count * 8)));
13544 RTX_FRAME_RELATED_P (tmp) = 1;
13545 XVECEXP (dwarf, 0, 0) = tmp;
13547 tmp = gen_rtx_SET (VOIDmode,
13548 gen_frame_mem (DFmode, stack_pointer_rtx),
13550 RTX_FRAME_RELATED_P (tmp) = 1;
13551 XVECEXP (dwarf, 0, 1) = tmp;
13553 for (i = 1; i < count; i++)
13555 reg = gen_rtx_REG (DFmode, base_reg);
13557 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13559 tmp = gen_rtx_SET (VOIDmode,
13560 gen_frame_mem (DFmode,
13561 plus_constant (stack_pointer_rtx,
13564 RTX_FRAME_RELATED_P (tmp) = 1;
13565 XVECEXP (dwarf, 0, i + 1) = tmp;
13568 par = emit_insn (par);
13569 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13570 RTX_FRAME_RELATED_P (par) = 1;
13575 /* Emit a call instruction with pattern PAT. ADDR is the address of
13576 the call target. */
13579 arm_emit_call_insn (rtx pat, rtx addr)
13583 insn = emit_call_insn (pat);
13585 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13586 If the call might use such an entry, add a use of the PIC register
13587 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13588 if (TARGET_VXWORKS_RTP
13590 && GET_CODE (addr) == SYMBOL_REF
13591 && (SYMBOL_REF_DECL (addr)
13592 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13593 : !SYMBOL_REF_LOCAL_P (addr)))
13595 require_pic_register ();
13596 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13600 /* Output a 'call' insn. */
13602 output_call (rtx *operands)
13604 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13606 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13607 if (REGNO (operands[0]) == LR_REGNUM)
13609 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13610 output_asm_insn ("mov%?\t%0, %|lr", operands);
13613 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13615 if (TARGET_INTERWORK || arm_arch4t)
13616 output_asm_insn ("bx%?\t%0", operands);
13618 output_asm_insn ("mov%?\t%|pc, %0", operands);
13623 /* Output a 'call' insn that is a reference in memory. This is
13624 disabled for ARMv5 and we prefer a blx instead because otherwise
13625 there's a significant performance overhead. */
13627 output_call_mem (rtx *operands)
13629 gcc_assert (!arm_arch5);
13630 if (TARGET_INTERWORK)
13632 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13633 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13634 output_asm_insn ("bx%?\t%|ip", operands);
13636 else if (regno_use_in (LR_REGNUM, operands[0]))
13638 /* LR is used in the memory address. We load the address in the
13639 first instruction. It's safe to use IP as the target of the
13640 load since the call will kill it anyway. */
13641 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13642 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13644 output_asm_insn ("bx%?\t%|ip", operands);
13646 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13650 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13651 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13658 /* Output a move from arm registers to an fpa registers.
13659 OPERANDS[0] is an fpa register.
13660 OPERANDS[1] is the first registers of an arm register pair. */
13662 output_mov_long_double_fpa_from_arm (rtx *operands)
13664 int arm_reg0 = REGNO (operands[1]);
13667 gcc_assert (arm_reg0 != IP_REGNUM);
13669 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13670 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13671 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13673 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13674 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13679 /* Output a move from an fpa register to arm registers.
13680 OPERANDS[0] is the first registers of an arm register pair.
13681 OPERANDS[1] is an fpa register. */
13683 output_mov_long_double_arm_from_fpa (rtx *operands)
13685 int arm_reg0 = REGNO (operands[0]);
13688 gcc_assert (arm_reg0 != IP_REGNUM);
13690 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13691 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13692 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13694 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13695 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13699 /* Output a move from arm registers to arm registers of a long double
13700 OPERANDS[0] is the destination.
13701 OPERANDS[1] is the source. */
13703 output_mov_long_double_arm_from_arm (rtx *operands)
13705 /* We have to be careful here because the two might overlap. */
13706 int dest_start = REGNO (operands[0]);
13707 int src_start = REGNO (operands[1]);
13711 if (dest_start < src_start)
13713 for (i = 0; i < 3; i++)
13715 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13716 ops[1] = gen_rtx_REG (SImode, src_start + i);
13717 output_asm_insn ("mov%?\t%0, %1", ops);
13722 for (i = 2; i >= 0; i--)
13724 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13725 ops[1] = gen_rtx_REG (SImode, src_start + i);
13726 output_asm_insn ("mov%?\t%0, %1", ops);
13734 arm_emit_movpair (rtx dest, rtx src)
13736 /* If the src is an immediate, simplify it. */
13737 if (CONST_INT_P (src))
13739 HOST_WIDE_INT val = INTVAL (src);
13740 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13741 if ((val >> 16) & 0x0000ffff)
13742 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13744 GEN_INT ((val >> 16) & 0x0000ffff));
13747 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13748 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13751 /* Output a move from arm registers to an fpa registers.
13752 OPERANDS[0] is an fpa register.
13753 OPERANDS[1] is the first registers of an arm register pair. */
13755 output_mov_double_fpa_from_arm (rtx *operands)
13757 int arm_reg0 = REGNO (operands[1]);
13760 gcc_assert (arm_reg0 != IP_REGNUM);
13762 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13763 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13764 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13765 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13769 /* Output a move from an fpa register to arm registers.
13770 OPERANDS[0] is the first registers of an arm register pair.
13771 OPERANDS[1] is an fpa register. */
13773 output_mov_double_arm_from_fpa (rtx *operands)
13775 int arm_reg0 = REGNO (operands[0]);
13778 gcc_assert (arm_reg0 != IP_REGNUM);
13780 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13781 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13782 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13783 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13787 /* Output a move between double words. It must be REG<-MEM
13790 output_move_double (rtx *operands, bool emit, int *count)
13792 enum rtx_code code0 = GET_CODE (operands[0]);
13793 enum rtx_code code1 = GET_CODE (operands[1]);
13798 /* The only case when this might happen is when
13799 you are looking at the length of a DImode instruction
13800 that has an invalid constant in it. */
13801 if (code0 == REG && code1 != MEM)
13803 gcc_assert (!emit);
13810 unsigned int reg0 = REGNO (operands[0]);
13812 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13814 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13816 switch (GET_CODE (XEXP (operands[1], 0)))
13823 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13824 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13826 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13831 gcc_assert (TARGET_LDRD);
13833 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13840 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13842 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13850 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13852 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13857 gcc_assert (TARGET_LDRD);
13859 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13864 /* Autoicrement addressing modes should never have overlapping
13865 base and destination registers, and overlapping index registers
13866 are already prohibited, so this doesn't need to worry about
13868 otherops[0] = operands[0];
13869 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13870 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13872 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13874 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13876 /* Registers overlap so split out the increment. */
13879 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13880 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13887 /* Use a single insn if we can.
13888 FIXME: IWMMXT allows offsets larger than ldrd can
13889 handle, fix these up with a pair of ldr. */
13891 || GET_CODE (otherops[2]) != CONST_INT
13892 || (INTVAL (otherops[2]) > -256
13893 && INTVAL (otherops[2]) < 256))
13896 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13902 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13903 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13913 /* Use a single insn if we can.
13914 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13915 fix these up with a pair of ldr. */
13917 || GET_CODE (otherops[2]) != CONST_INT
13918 || (INTVAL (otherops[2]) > -256
13919 && INTVAL (otherops[2]) < 256))
13922 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13928 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13929 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13939 /* We might be able to use ldrd %0, %1 here. However the range is
13940 different to ldr/adr, and it is broken on some ARMv7-M
13941 implementations. */
13942 /* Use the second register of the pair to avoid problematic
13944 otherops[1] = operands[1];
13946 output_asm_insn ("adr%?\t%0, %1", otherops);
13947 operands[1] = otherops[0];
13951 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13953 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13960 /* ??? This needs checking for thumb2. */
13962 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13963 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13965 otherops[0] = operands[0];
13966 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13967 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13969 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13971 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13973 switch ((int) INTVAL (otherops[2]))
13977 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13983 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13989 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13993 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13994 operands[1] = otherops[0];
13996 && (GET_CODE (otherops[2]) == REG
13998 || (GET_CODE (otherops[2]) == CONST_INT
13999 && INTVAL (otherops[2]) > -256
14000 && INTVAL (otherops[2]) < 256)))
14002 if (reg_overlap_mentioned_p (operands[0],
14006 /* Swap base and index registers over to
14007 avoid a conflict. */
14009 otherops[1] = otherops[2];
14012 /* If both registers conflict, it will usually
14013 have been fixed by a splitter. */
14014 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14015 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14019 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14020 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14027 otherops[0] = operands[0];
14029 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14034 if (GET_CODE (otherops[2]) == CONST_INT)
14038 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14039 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14041 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14047 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14053 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14057 return "ldr%(d%)\t%0, [%1]";
14059 return "ldm%(ia%)\t%1, %M0";
14063 otherops[1] = adjust_address (operands[1], SImode, 4);
14064 /* Take care of overlapping base/data reg. */
14065 if (reg_mentioned_p (operands[0], operands[1]))
14069 output_asm_insn ("ldr%?\t%0, %1", otherops);
14070 output_asm_insn ("ldr%?\t%0, %1", operands);
14080 output_asm_insn ("ldr%?\t%0, %1", operands);
14081 output_asm_insn ("ldr%?\t%0, %1", otherops);
14091 /* Constraints should ensure this. */
14092 gcc_assert (code0 == MEM && code1 == REG);
14093 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14095 switch (GET_CODE (XEXP (operands[0], 0)))
14101 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14103 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14108 gcc_assert (TARGET_LDRD);
14110 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14117 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14119 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14127 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14129 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14134 gcc_assert (TARGET_LDRD);
14136 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14141 otherops[0] = operands[1];
14142 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14143 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14145 /* IWMMXT allows offsets larger than ldrd can handle,
14146 fix these up with a pair of ldr. */
14148 && GET_CODE (otherops[2]) == CONST_INT
14149 && (INTVAL(otherops[2]) <= -256
14150 || INTVAL(otherops[2]) >= 256))
14152 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14156 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14157 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14166 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14167 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14173 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14176 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14181 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14186 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14187 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14189 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14193 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14200 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14207 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14212 && (GET_CODE (otherops[2]) == REG
14214 || (GET_CODE (otherops[2]) == CONST_INT
14215 && INTVAL (otherops[2]) > -256
14216 && INTVAL (otherops[2]) < 256)))
14218 otherops[0] = operands[1];
14219 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14221 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14227 otherops[0] = adjust_address (operands[0], SImode, 4);
14228 otherops[1] = operands[1];
14231 output_asm_insn ("str%?\t%1, %0", operands);
14232 output_asm_insn ("str%?\t%H1, %0", otherops);
14242 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14243 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14246 output_move_quad (rtx *operands)
14248 if (REG_P (operands[0]))
14250 /* Load, or reg->reg move. */
14252 if (MEM_P (operands[1]))
14254 switch (GET_CODE (XEXP (operands[1], 0)))
14257 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14262 output_asm_insn ("adr%?\t%0, %1", operands);
14263 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14267 gcc_unreachable ();
14275 gcc_assert (REG_P (operands[1]));
14277 dest = REGNO (operands[0]);
14278 src = REGNO (operands[1]);
14280 /* This seems pretty dumb, but hopefully GCC won't try to do it
14283 for (i = 0; i < 4; i++)
14285 ops[0] = gen_rtx_REG (SImode, dest + i);
14286 ops[1] = gen_rtx_REG (SImode, src + i);
14287 output_asm_insn ("mov%?\t%0, %1", ops);
14290 for (i = 3; i >= 0; i--)
14292 ops[0] = gen_rtx_REG (SImode, dest + i);
14293 ops[1] = gen_rtx_REG (SImode, src + i);
14294 output_asm_insn ("mov%?\t%0, %1", ops);
14300 gcc_assert (MEM_P (operands[0]));
14301 gcc_assert (REG_P (operands[1]));
14302 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14304 switch (GET_CODE (XEXP (operands[0], 0)))
14307 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14311 gcc_unreachable ();
14318 /* Output a VFP load or store instruction. */
14321 output_move_vfp (rtx *operands)
14323 rtx reg, mem, addr, ops[2];
14324 int load = REG_P (operands[0]);
14325 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14326 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14329 enum machine_mode mode;
14331 reg = operands[!load];
14332 mem = operands[load];
14334 mode = GET_MODE (reg);
14336 gcc_assert (REG_P (reg));
14337 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14338 gcc_assert (mode == SFmode
14342 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14343 gcc_assert (MEM_P (mem));
14345 addr = XEXP (mem, 0);
14347 switch (GET_CODE (addr))
14350 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14351 ops[0] = XEXP (addr, 0);
14356 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14357 ops[0] = XEXP (addr, 0);
14362 templ = "f%s%c%%?\t%%%s0, %%1%s";
14368 sprintf (buff, templ,
14369 load ? "ld" : "st",
14372 integer_p ? "\t%@ int" : "");
14373 output_asm_insn (buff, ops);
14378 /* Output a Neon quad-word load or store, or a load or store for
14379 larger structure modes.
14381 WARNING: The ordering of elements is weird in big-endian mode,
14382 because we use VSTM, as required by the EABI. GCC RTL defines
14383 element ordering based on in-memory order. This can be differ
14384 from the architectural ordering of elements within a NEON register.
14385 The intrinsics defined in arm_neon.h use the NEON register element
14386 ordering, not the GCC RTL element ordering.
14388 For example, the in-memory ordering of a big-endian a quadword
14389 vector with 16-bit elements when stored from register pair {d0,d1}
14390 will be (lowest address first, d0[N] is NEON register element N):
14392 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14394 When necessary, quadword registers (dN, dN+1) are moved to ARM
14395 registers from rN in the order:
14397 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14399 So that STM/LDM can be used on vectors in ARM registers, and the
14400 same memory layout will result as if VSTM/VLDM were used. */
14403 output_move_neon (rtx *operands)
14405 rtx reg, mem, addr, ops[2];
14406 int regno, load = REG_P (operands[0]);
14409 enum machine_mode mode;
14411 reg = operands[!load];
14412 mem = operands[load];
14414 mode = GET_MODE (reg);
14416 gcc_assert (REG_P (reg));
14417 regno = REGNO (reg);
14418 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14419 || NEON_REGNO_OK_FOR_QUAD (regno));
14420 gcc_assert (VALID_NEON_DREG_MODE (mode)
14421 || VALID_NEON_QREG_MODE (mode)
14422 || VALID_NEON_STRUCT_MODE (mode));
14423 gcc_assert (MEM_P (mem));
14425 addr = XEXP (mem, 0);
14427 /* Strip off const from addresses like (const (plus (...))). */
14428 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14429 addr = XEXP (addr, 0);
14431 switch (GET_CODE (addr))
14434 templ = "v%smia%%?\t%%0!, %%h1";
14435 ops[0] = XEXP (addr, 0);
14440 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14441 templ = "v%smdb%%?\t%%0!, %%h1";
14442 ops[0] = XEXP (addr, 0);
14447 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14448 gcc_unreachable ();
14453 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14456 for (i = 0; i < nregs; i++)
14458 /* We're only using DImode here because it's a convenient size. */
14459 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14460 ops[1] = adjust_address (mem, DImode, 8 * i);
14461 if (reg_overlap_mentioned_p (ops[0], mem))
14463 gcc_assert (overlap == -1);
14468 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14469 output_asm_insn (buff, ops);
14474 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14475 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14476 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14477 output_asm_insn (buff, ops);
14484 templ = "v%smia%%?\t%%m0, %%h1";
14489 sprintf (buff, templ, load ? "ld" : "st");
14490 output_asm_insn (buff, ops);
14495 /* Compute and return the length of neon_mov<mode>, where <mode> is
14496 one of VSTRUCT modes: EI, OI, CI or XI. */
14498 arm_attr_length_move_neon (rtx insn)
14500 rtx reg, mem, addr;
14502 enum machine_mode mode;
14504 extract_insn_cached (insn);
14506 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14508 mode = GET_MODE (recog_data.operand[0]);
14519 gcc_unreachable ();
14523 load = REG_P (recog_data.operand[0]);
14524 reg = recog_data.operand[!load];
14525 mem = recog_data.operand[load];
14527 gcc_assert (MEM_P (mem));
14529 mode = GET_MODE (reg);
14530 addr = XEXP (mem, 0);
14532 /* Strip off const from addresses like (const (plus (...))). */
14533 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14534 addr = XEXP (addr, 0);
14536 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14538 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14545 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14549 arm_address_offset_is_imm (rtx insn)
14553 extract_insn_cached (insn);
14555 if (REG_P (recog_data.operand[0]))
14558 mem = recog_data.operand[0];
14560 gcc_assert (MEM_P (mem));
14562 addr = XEXP (mem, 0);
14564 if (GET_CODE (addr) == REG
14565 || (GET_CODE (addr) == PLUS
14566 && GET_CODE (XEXP (addr, 0)) == REG
14567 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14573 /* Output an ADD r, s, #n where n may be too big for one instruction.
14574 If adding zero to one register, output nothing. */
14576 output_add_immediate (rtx *operands)
14578 HOST_WIDE_INT n = INTVAL (operands[2]);
14580 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14583 output_multi_immediate (operands,
14584 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14587 output_multi_immediate (operands,
14588 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14595 /* Output a multiple immediate operation.
14596 OPERANDS is the vector of operands referred to in the output patterns.
14597 INSTR1 is the output pattern to use for the first constant.
14598 INSTR2 is the output pattern to use for subsequent constants.
14599 IMMED_OP is the index of the constant slot in OPERANDS.
14600 N is the constant value. */
14601 static const char *
14602 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14603 int immed_op, HOST_WIDE_INT n)
14605 #if HOST_BITS_PER_WIDE_INT > 32
14611 /* Quick and easy output. */
14612 operands[immed_op] = const0_rtx;
14613 output_asm_insn (instr1, operands);
14618 const char * instr = instr1;
14620 /* Note that n is never zero here (which would give no output). */
14621 for (i = 0; i < 32; i += 2)
14625 operands[immed_op] = GEN_INT (n & (255 << i));
14626 output_asm_insn (instr, operands);
14636 /* Return the name of a shifter operation. */
14637 static const char *
14638 arm_shift_nmem(enum rtx_code code)
14643 return ARM_LSL_NAME;
14659 /* Return the appropriate ARM instruction for the operation code.
14660 The returned result should not be overwritten. OP is the rtx of the
14661 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14664 arithmetic_instr (rtx op, int shift_first_arg)
14666 switch (GET_CODE (op))
14672 return shift_first_arg ? "rsb" : "sub";
14687 return arm_shift_nmem(GET_CODE(op));
14690 gcc_unreachable ();
14694 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14695 for the operation code. The returned result should not be overwritten.
14696 OP is the rtx code of the shift.
14697 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14699 static const char *
14700 shift_op (rtx op, HOST_WIDE_INT *amountp)
14703 enum rtx_code code = GET_CODE (op);
14705 switch (GET_CODE (XEXP (op, 1)))
14713 *amountp = INTVAL (XEXP (op, 1));
14717 gcc_unreachable ();
14723 gcc_assert (*amountp != -1);
14724 *amountp = 32 - *amountp;
14727 /* Fall through. */
14733 mnem = arm_shift_nmem(code);
14737 /* We never have to worry about the amount being other than a
14738 power of 2, since this case can never be reloaded from a reg. */
14739 gcc_assert (*amountp != -1);
14740 *amountp = int_log2 (*amountp);
14741 return ARM_LSL_NAME;
14744 gcc_unreachable ();
14747 if (*amountp != -1)
14749 /* This is not 100% correct, but follows from the desire to merge
14750 multiplication by a power of 2 with the recognizer for a
14751 shift. >=32 is not a valid shift for "lsl", so we must try and
14752 output a shift that produces the correct arithmetical result.
14753 Using lsr #32 is identical except for the fact that the carry bit
14754 is not set correctly if we set the flags; but we never use the
14755 carry bit from such an operation, so we can ignore that. */
14756 if (code == ROTATERT)
14757 /* Rotate is just modulo 32. */
14759 else if (*amountp != (*amountp & 31))
14761 if (code == ASHIFT)
14766 /* Shifts of 0 are no-ops. */
14774 /* Obtain the shift from the POWER of two. */
14776 static HOST_WIDE_INT
14777 int_log2 (HOST_WIDE_INT power)
14779 HOST_WIDE_INT shift = 0;
14781 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14783 gcc_assert (shift <= 31);
14790 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14791 because /bin/as is horribly restrictive. The judgement about
14792 whether or not each character is 'printable' (and can be output as
14793 is) or not (and must be printed with an octal escape) must be made
14794 with reference to the *host* character set -- the situation is
14795 similar to that discussed in the comments above pp_c_char in
14796 c-pretty-print.c. */
14798 #define MAX_ASCII_LEN 51
14801 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14804 int len_so_far = 0;
14806 fputs ("\t.ascii\t\"", stream);
14808 for (i = 0; i < len; i++)
14812 if (len_so_far >= MAX_ASCII_LEN)
14814 fputs ("\"\n\t.ascii\t\"", stream);
14820 if (c == '\\' || c == '\"')
14822 putc ('\\', stream);
14830 fprintf (stream, "\\%03o", c);
14835 fputs ("\"\n", stream);
14838 /* Compute the register save mask for registers 0 through 12
14839 inclusive. This code is used by arm_compute_save_reg_mask. */
14841 static unsigned long
14842 arm_compute_save_reg0_reg12_mask (void)
14844 unsigned long func_type = arm_current_func_type ();
14845 unsigned long save_reg_mask = 0;
14848 if (IS_INTERRUPT (func_type))
14850 unsigned int max_reg;
14851 /* Interrupt functions must not corrupt any registers,
14852 even call clobbered ones. If this is a leaf function
14853 we can just examine the registers used by the RTL, but
14854 otherwise we have to assume that whatever function is
14855 called might clobber anything, and so we have to save
14856 all the call-clobbered registers as well. */
14857 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14858 /* FIQ handlers have registers r8 - r12 banked, so
14859 we only need to check r0 - r7, Normal ISRs only
14860 bank r14 and r15, so we must check up to r12.
14861 r13 is the stack pointer which is always preserved,
14862 so we do not need to consider it here. */
14867 for (reg = 0; reg <= max_reg; reg++)
14868 if (df_regs_ever_live_p (reg)
14869 || (! current_function_is_leaf && call_used_regs[reg]))
14870 save_reg_mask |= (1 << reg);
14872 /* Also save the pic base register if necessary. */
14874 && !TARGET_SINGLE_PIC_BASE
14875 && arm_pic_register != INVALID_REGNUM
14876 && crtl->uses_pic_offset_table)
14877 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14879 else if (IS_VOLATILE(func_type))
14881 /* For noreturn functions we historically omitted register saves
14882 altogether. However this really messes up debugging. As a
14883 compromise save just the frame pointers. Combined with the link
14884 register saved elsewhere this should be sufficient to get
14886 if (frame_pointer_needed)
14887 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14888 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14889 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14890 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14891 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14895 /* In the normal case we only need to save those registers
14896 which are call saved and which are used by this function. */
14897 for (reg = 0; reg <= 11; reg++)
14898 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14899 save_reg_mask |= (1 << reg);
14901 /* Handle the frame pointer as a special case. */
14902 if (frame_pointer_needed)
14903 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14905 /* If we aren't loading the PIC register,
14906 don't stack it even though it may be live. */
14908 && !TARGET_SINGLE_PIC_BASE
14909 && arm_pic_register != INVALID_REGNUM
14910 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14911 || crtl->uses_pic_offset_table))
14912 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14914 /* The prologue will copy SP into R0, so save it. */
14915 if (IS_STACKALIGN (func_type))
14916 save_reg_mask |= 1;
14919 /* Save registers so the exception handler can modify them. */
14920 if (crtl->calls_eh_return)
14926 reg = EH_RETURN_DATA_REGNO (i);
14927 if (reg == INVALID_REGNUM)
14929 save_reg_mask |= 1 << reg;
14933 return save_reg_mask;
14937 /* Compute the number of bytes used to store the static chain register on the
14938 stack, above the stack frame. We need to know this accurately to get the
14939 alignment of the rest of the stack frame correct. */
14941 static int arm_compute_static_chain_stack_bytes (void)
14943 unsigned long func_type = arm_current_func_type ();
14944 int static_chain_stack_bytes = 0;
14946 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14947 IS_NESTED (func_type) &&
14948 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14949 static_chain_stack_bytes = 4;
14951 return static_chain_stack_bytes;
14955 /* Compute a bit mask of which registers need to be
14956 saved on the stack for the current function.
14957 This is used by arm_get_frame_offsets, which may add extra registers. */
14959 static unsigned long
14960 arm_compute_save_reg_mask (void)
14962 unsigned int save_reg_mask = 0;
14963 unsigned long func_type = arm_current_func_type ();
14966 if (IS_NAKED (func_type))
14967 /* This should never really happen. */
14970 /* If we are creating a stack frame, then we must save the frame pointer,
14971 IP (which will hold the old stack pointer), LR and the PC. */
14972 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14974 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14977 | (1 << PC_REGNUM);
14979 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14981 /* Decide if we need to save the link register.
14982 Interrupt routines have their own banked link register,
14983 so they never need to save it.
14984 Otherwise if we do not use the link register we do not need to save
14985 it. If we are pushing other registers onto the stack however, we
14986 can save an instruction in the epilogue by pushing the link register
14987 now and then popping it back into the PC. This incurs extra memory
14988 accesses though, so we only do it when optimizing for size, and only
14989 if we know that we will not need a fancy return sequence. */
14990 if (df_regs_ever_live_p (LR_REGNUM)
14993 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14994 && !crtl->calls_eh_return))
14995 save_reg_mask |= 1 << LR_REGNUM;
14997 if (cfun->machine->lr_save_eliminated)
14998 save_reg_mask &= ~ (1 << LR_REGNUM);
15000 if (TARGET_REALLY_IWMMXT
15001 && ((bit_count (save_reg_mask)
15002 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15003 arm_compute_static_chain_stack_bytes())
15006 /* The total number of registers that are going to be pushed
15007 onto the stack is odd. We need to ensure that the stack
15008 is 64-bit aligned before we start to save iWMMXt registers,
15009 and also before we start to create locals. (A local variable
15010 might be a double or long long which we will load/store using
15011 an iWMMXt instruction). Therefore we need to push another
15012 ARM register, so that the stack will be 64-bit aligned. We
15013 try to avoid using the arg registers (r0 -r3) as they might be
15014 used to pass values in a tail call. */
15015 for (reg = 4; reg <= 12; reg++)
15016 if ((save_reg_mask & (1 << reg)) == 0)
15020 save_reg_mask |= (1 << reg);
15023 cfun->machine->sibcall_blocked = 1;
15024 save_reg_mask |= (1 << 3);
15028 /* We may need to push an additional register for use initializing the
15029 PIC base register. */
15030 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15031 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15033 reg = thumb_find_work_register (1 << 4);
15034 if (!call_used_regs[reg])
15035 save_reg_mask |= (1 << reg);
15038 return save_reg_mask;
15042 /* Compute a bit mask of which registers need to be
15043 saved on the stack for the current function. */
15044 static unsigned long
15045 thumb1_compute_save_reg_mask (void)
15047 unsigned long mask;
15051 for (reg = 0; reg < 12; reg ++)
15052 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15056 && !TARGET_SINGLE_PIC_BASE
15057 && arm_pic_register != INVALID_REGNUM
15058 && crtl->uses_pic_offset_table)
15059 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15061 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15062 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15063 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15065 /* LR will also be pushed if any lo regs are pushed. */
15066 if (mask & 0xff || thumb_force_lr_save ())
15067 mask |= (1 << LR_REGNUM);
15069 /* Make sure we have a low work register if we need one.
15070 We will need one if we are going to push a high register,
15071 but we are not currently intending to push a low register. */
15072 if ((mask & 0xff) == 0
15073 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15075 /* Use thumb_find_work_register to choose which register
15076 we will use. If the register is live then we will
15077 have to push it. Use LAST_LO_REGNUM as our fallback
15078 choice for the register to select. */
15079 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15080 /* Make sure the register returned by thumb_find_work_register is
15081 not part of the return value. */
15082 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15083 reg = LAST_LO_REGNUM;
15085 if (! call_used_regs[reg])
15089 /* The 504 below is 8 bytes less than 512 because there are two possible
15090 alignment words. We can't tell here if they will be present or not so we
15091 have to play it safe and assume that they are. */
15092 if ((CALLER_INTERWORKING_SLOT_SIZE +
15093 ROUND_UP_WORD (get_frame_size ()) +
15094 crtl->outgoing_args_size) >= 504)
15096 /* This is the same as the code in thumb1_expand_prologue() which
15097 determines which register to use for stack decrement. */
15098 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15099 if (mask & (1 << reg))
15102 if (reg > LAST_LO_REGNUM)
15104 /* Make sure we have a register available for stack decrement. */
15105 mask |= 1 << LAST_LO_REGNUM;
15113 /* Return the number of bytes required to save VFP registers. */
15115 arm_get_vfp_saved_size (void)
15117 unsigned int regno;
15122 /* Space for saved VFP registers. */
15123 if (TARGET_HARD_FLOAT && TARGET_VFP)
15126 for (regno = FIRST_VFP_REGNUM;
15127 regno < LAST_VFP_REGNUM;
15130 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15131 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15135 /* Workaround ARM10 VFPr1 bug. */
15136 if (count == 2 && !arm_arch6)
15138 saved += count * 8;
15147 if (count == 2 && !arm_arch6)
15149 saved += count * 8;
15156 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15157 everything bar the final return instruction. */
15159 output_return_instruction (rtx operand, int really_return, int reverse)
15161 char conditional[10];
15164 unsigned long live_regs_mask;
15165 unsigned long func_type;
15166 arm_stack_offsets *offsets;
15168 func_type = arm_current_func_type ();
15170 if (IS_NAKED (func_type))
15173 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15175 /* If this function was declared non-returning, and we have
15176 found a tail call, then we have to trust that the called
15177 function won't return. */
15182 /* Otherwise, trap an attempted return by aborting. */
15184 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15186 assemble_external_libcall (ops[1]);
15187 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15193 gcc_assert (!cfun->calls_alloca || really_return);
15195 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15197 cfun->machine->return_used_this_function = 1;
15199 offsets = arm_get_frame_offsets ();
15200 live_regs_mask = offsets->saved_regs_mask;
15202 if (live_regs_mask)
15204 const char * return_reg;
15206 /* If we do not have any special requirements for function exit
15207 (e.g. interworking) then we can load the return address
15208 directly into the PC. Otherwise we must load it into LR. */
15210 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15211 return_reg = reg_names[PC_REGNUM];
15213 return_reg = reg_names[LR_REGNUM];
15215 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15217 /* There are three possible reasons for the IP register
15218 being saved. 1) a stack frame was created, in which case
15219 IP contains the old stack pointer, or 2) an ISR routine
15220 corrupted it, or 3) it was saved to align the stack on
15221 iWMMXt. In case 1, restore IP into SP, otherwise just
15223 if (frame_pointer_needed)
15225 live_regs_mask &= ~ (1 << IP_REGNUM);
15226 live_regs_mask |= (1 << SP_REGNUM);
15229 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15232 /* On some ARM architectures it is faster to use LDR rather than
15233 LDM to load a single register. On other architectures, the
15234 cost is the same. In 26 bit mode, or for exception handlers,
15235 we have to use LDM to load the PC so that the CPSR is also
15237 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15238 if (live_regs_mask == (1U << reg))
15241 if (reg <= LAST_ARM_REGNUM
15242 && (reg != LR_REGNUM
15244 || ! IS_INTERRUPT (func_type)))
15246 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15247 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15254 /* Generate the load multiple instruction to restore the
15255 registers. Note we can get here, even if
15256 frame_pointer_needed is true, but only if sp already
15257 points to the base of the saved core registers. */
15258 if (live_regs_mask & (1 << SP_REGNUM))
15260 unsigned HOST_WIDE_INT stack_adjust;
15262 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15263 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15265 if (stack_adjust && arm_arch5 && TARGET_ARM)
15266 if (TARGET_UNIFIED_ASM)
15267 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15269 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15272 /* If we can't use ldmib (SA110 bug),
15273 then try to pop r3 instead. */
15275 live_regs_mask |= 1 << 3;
15277 if (TARGET_UNIFIED_ASM)
15278 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15280 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15284 if (TARGET_UNIFIED_ASM)
15285 sprintf (instr, "pop%s\t{", conditional);
15287 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15289 p = instr + strlen (instr);
15291 for (reg = 0; reg <= SP_REGNUM; reg++)
15292 if (live_regs_mask & (1 << reg))
15294 int l = strlen (reg_names[reg]);
15300 memcpy (p, ", ", 2);
15304 memcpy (p, "%|", 2);
15305 memcpy (p + 2, reg_names[reg], l);
15309 if (live_regs_mask & (1 << LR_REGNUM))
15311 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15312 /* If returning from an interrupt, restore the CPSR. */
15313 if (IS_INTERRUPT (func_type))
15320 output_asm_insn (instr, & operand);
15322 /* See if we need to generate an extra instruction to
15323 perform the actual function return. */
15325 && func_type != ARM_FT_INTERWORKED
15326 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15328 /* The return has already been handled
15329 by loading the LR into the PC. */
15336 switch ((int) ARM_FUNC_TYPE (func_type))
15340 /* ??? This is wrong for unified assembly syntax. */
15341 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15344 case ARM_FT_INTERWORKED:
15345 sprintf (instr, "bx%s\t%%|lr", conditional);
15348 case ARM_FT_EXCEPTION:
15349 /* ??? This is wrong for unified assembly syntax. */
15350 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15354 /* Use bx if it's available. */
15355 if (arm_arch5 || arm_arch4t)
15356 sprintf (instr, "bx%s\t%%|lr", conditional);
15358 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15362 output_asm_insn (instr, & operand);
15368 /* Write the function name into the code section, directly preceding
15369 the function prologue.
15371 Code will be output similar to this:
15373 .ascii "arm_poke_function_name", 0
15376 .word 0xff000000 + (t1 - t0)
15377 arm_poke_function_name
15379 stmfd sp!, {fp, ip, lr, pc}
15382 When performing a stack backtrace, code can inspect the value
15383 of 'pc' stored at 'fp' + 0. If the trace function then looks
15384 at location pc - 12 and the top 8 bits are set, then we know
15385 that there is a function name embedded immediately preceding this
15386 location and has length ((pc[-3]) & 0xff000000).
15388 We assume that pc is declared as a pointer to an unsigned long.
15390 It is of no benefit to output the function name if we are assembling
15391 a leaf function. These function types will not contain a stack
15392 backtrace structure, therefore it is not possible to determine the
15395 arm_poke_function_name (FILE *stream, const char *name)
15397 unsigned long alignlength;
15398 unsigned long length;
15401 length = strlen (name) + 1;
15402 alignlength = ROUND_UP_WORD (length);
15404 ASM_OUTPUT_ASCII (stream, name, length);
15405 ASM_OUTPUT_ALIGN (stream, 2);
15406 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15407 assemble_aligned_integer (UNITS_PER_WORD, x);
15410 /* Place some comments into the assembler stream
15411 describing the current function. */
15413 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15415 unsigned long func_type;
15417 /* ??? Do we want to print some of the below anyway? */
15421 /* Sanity check. */
15422 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15424 func_type = arm_current_func_type ();
15426 switch ((int) ARM_FUNC_TYPE (func_type))
15429 case ARM_FT_NORMAL:
15431 case ARM_FT_INTERWORKED:
15432 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15435 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15438 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15440 case ARM_FT_EXCEPTION:
15441 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15445 if (IS_NAKED (func_type))
15446 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15448 if (IS_VOLATILE (func_type))
15449 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15451 if (IS_NESTED (func_type))
15452 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15453 if (IS_STACKALIGN (func_type))
15454 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15456 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15458 crtl->args.pretend_args_size, frame_size);
15460 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15461 frame_pointer_needed,
15462 cfun->machine->uses_anonymous_args);
15464 if (cfun->machine->lr_save_eliminated)
15465 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15467 if (crtl->calls_eh_return)
15468 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15473 arm_output_epilogue (rtx sibling)
15476 unsigned long saved_regs_mask;
15477 unsigned long func_type;
15478 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15479 frame that is $fp + 4 for a non-variadic function. */
15480 int floats_offset = 0;
15482 FILE * f = asm_out_file;
15483 unsigned int lrm_count = 0;
15484 int really_return = (sibling == NULL);
15486 arm_stack_offsets *offsets;
15488 /* If we have already generated the return instruction
15489 then it is futile to generate anything else. */
15490 if (use_return_insn (FALSE, sibling) &&
15491 (cfun->machine->return_used_this_function != 0))
15494 func_type = arm_current_func_type ();
15496 if (IS_NAKED (func_type))
15497 /* Naked functions don't have epilogues. */
15500 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15504 /* A volatile function should never return. Call abort. */
15505 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15506 assemble_external_libcall (op);
15507 output_asm_insn ("bl\t%a0", &op);
15512 /* If we are throwing an exception, then we really must be doing a
15513 return, so we can't tail-call. */
15514 gcc_assert (!crtl->calls_eh_return || really_return);
15516 offsets = arm_get_frame_offsets ();
15517 saved_regs_mask = offsets->saved_regs_mask;
15520 lrm_count = bit_count (saved_regs_mask);
15522 floats_offset = offsets->saved_args;
15523 /* Compute how far away the floats will be. */
15524 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15525 if (saved_regs_mask & (1 << reg))
15526 floats_offset += 4;
15528 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15530 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15531 int vfp_offset = offsets->frame;
15533 if (TARGET_FPA_EMU2)
15535 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15536 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15538 floats_offset += 12;
15539 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15540 reg, FP_REGNUM, floats_offset - vfp_offset);
15545 start_reg = LAST_FPA_REGNUM;
15547 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15549 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15551 floats_offset += 12;
15553 /* We can't unstack more than four registers at once. */
15554 if (start_reg - reg == 3)
15556 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15557 reg, FP_REGNUM, floats_offset - vfp_offset);
15558 start_reg = reg - 1;
15563 if (reg != start_reg)
15564 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15565 reg + 1, start_reg - reg,
15566 FP_REGNUM, floats_offset - vfp_offset);
15567 start_reg = reg - 1;
15571 /* Just in case the last register checked also needs unstacking. */
15572 if (reg != start_reg)
15573 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15574 reg + 1, start_reg - reg,
15575 FP_REGNUM, floats_offset - vfp_offset);
15578 if (TARGET_HARD_FLOAT && TARGET_VFP)
15582 /* The fldmd insns do not have base+offset addressing
15583 modes, so we use IP to hold the address. */
15584 saved_size = arm_get_vfp_saved_size ();
15586 if (saved_size > 0)
15588 floats_offset += saved_size;
15589 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15590 FP_REGNUM, floats_offset - vfp_offset);
15592 start_reg = FIRST_VFP_REGNUM;
15593 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15595 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15596 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15598 if (start_reg != reg)
15599 vfp_output_fldmd (f, IP_REGNUM,
15600 (start_reg - FIRST_VFP_REGNUM) / 2,
15601 (reg - start_reg) / 2);
15602 start_reg = reg + 2;
15605 if (start_reg != reg)
15606 vfp_output_fldmd (f, IP_REGNUM,
15607 (start_reg - FIRST_VFP_REGNUM) / 2,
15608 (reg - start_reg) / 2);
15613 /* The frame pointer is guaranteed to be non-double-word aligned.
15614 This is because it is set to (old_stack_pointer - 4) and the
15615 old_stack_pointer was double word aligned. Thus the offset to
15616 the iWMMXt registers to be loaded must also be non-double-word
15617 sized, so that the resultant address *is* double-word aligned.
15618 We can ignore floats_offset since that was already included in
15619 the live_regs_mask. */
15620 lrm_count += (lrm_count % 2 ? 2 : 1);
15622 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15623 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15625 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15626 reg, FP_REGNUM, lrm_count * 4);
15631 /* saved_regs_mask should contain the IP, which at the time of stack
15632 frame generation actually contains the old stack pointer. So a
15633 quick way to unwind the stack is just pop the IP register directly
15634 into the stack pointer. */
15635 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15636 saved_regs_mask &= ~ (1 << IP_REGNUM);
15637 saved_regs_mask |= (1 << SP_REGNUM);
15639 /* There are two registers left in saved_regs_mask - LR and PC. We
15640 only need to restore the LR register (the return address), but to
15641 save time we can load it directly into the PC, unless we need a
15642 special function exit sequence, or we are not really returning. */
15644 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15645 && !crtl->calls_eh_return)
15646 /* Delete the LR from the register mask, so that the LR on
15647 the stack is loaded into the PC in the register mask. */
15648 saved_regs_mask &= ~ (1 << LR_REGNUM);
15650 saved_regs_mask &= ~ (1 << PC_REGNUM);
15652 /* We must use SP as the base register, because SP is one of the
15653 registers being restored. If an interrupt or page fault
15654 happens in the ldm instruction, the SP might or might not
15655 have been restored. That would be bad, as then SP will no
15656 longer indicate the safe area of stack, and we can get stack
15657 corruption. Using SP as the base register means that it will
15658 be reset correctly to the original value, should an interrupt
15659 occur. If the stack pointer already points at the right
15660 place, then omit the subtraction. */
15661 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15662 || cfun->calls_alloca)
15663 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15664 4 * bit_count (saved_regs_mask));
15665 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15667 if (IS_INTERRUPT (func_type))
15668 /* Interrupt handlers will have pushed the
15669 IP onto the stack, so restore it now. */
15670 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15674 /* This branch is executed for ARM mode (non-apcs frames) and
15675 Thumb-2 mode. Frame layout is essentially the same for those
15676 cases, except that in ARM mode frame pointer points to the
15677 first saved register, while in Thumb-2 mode the frame pointer points
15678 to the last saved register.
15680 It is possible to make frame pointer point to last saved
15681 register in both cases, and remove some conditionals below.
15682 That means that fp setup in prologue would be just "mov fp, sp"
15683 and sp restore in epilogue would be just "mov sp, fp", whereas
15684 now we have to use add/sub in those cases. However, the value
15685 of that would be marginal, as both mov and add/sub are 32-bit
15686 in ARM mode, and it would require extra conditionals
15687 in arm_expand_prologue to distingish ARM-apcs-frame case
15688 (where frame pointer is required to point at first register)
15689 and ARM-non-apcs-frame. Therefore, such change is postponed
15690 until real need arise. */
15691 unsigned HOST_WIDE_INT amount;
15693 /* Restore stack pointer if necessary. */
15694 if (TARGET_ARM && frame_pointer_needed)
15696 operands[0] = stack_pointer_rtx;
15697 operands[1] = hard_frame_pointer_rtx;
15699 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15700 output_add_immediate (operands);
15704 if (frame_pointer_needed)
15706 /* For Thumb-2 restore sp from the frame pointer.
15707 Operand restrictions mean we have to incrememnt FP, then copy
15709 amount = offsets->locals_base - offsets->saved_regs;
15710 operands[0] = hard_frame_pointer_rtx;
15714 unsigned long count;
15715 operands[0] = stack_pointer_rtx;
15716 amount = offsets->outgoing_args - offsets->saved_regs;
15717 /* pop call clobbered registers if it avoids a
15718 separate stack adjustment. */
15719 count = offsets->saved_regs - offsets->saved_args;
15722 && !crtl->calls_eh_return
15723 && bit_count(saved_regs_mask) * 4 == count
15724 && !IS_INTERRUPT (func_type)
15725 && !IS_STACKALIGN (func_type)
15726 && !crtl->tail_call_emit)
15728 unsigned long mask;
15729 /* Preserve return values, of any size. */
15730 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15732 mask &= ~saved_regs_mask;
15734 while (bit_count (mask) * 4 > amount)
15736 while ((mask & (1 << reg)) == 0)
15738 mask &= ~(1 << reg);
15740 if (bit_count (mask) * 4 == amount) {
15742 saved_regs_mask |= mask;
15749 operands[1] = operands[0];
15750 operands[2] = GEN_INT (amount);
15751 output_add_immediate (operands);
15753 if (frame_pointer_needed)
15754 asm_fprintf (f, "\tmov\t%r, %r\n",
15755 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15758 if (TARGET_FPA_EMU2)
15760 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15761 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15762 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15767 start_reg = FIRST_FPA_REGNUM;
15769 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15771 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15773 if (reg - start_reg == 3)
15775 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15776 start_reg, SP_REGNUM);
15777 start_reg = reg + 1;
15782 if (reg != start_reg)
15783 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15784 start_reg, reg - start_reg,
15787 start_reg = reg + 1;
15791 /* Just in case the last register checked also needs unstacking. */
15792 if (reg != start_reg)
15793 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15794 start_reg, reg - start_reg, SP_REGNUM);
15797 if (TARGET_HARD_FLOAT && TARGET_VFP)
15799 int end_reg = LAST_VFP_REGNUM + 1;
15801 /* Scan the registers in reverse order. We need to match
15802 any groupings made in the prologue and generate matching
15804 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15806 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15807 && (!df_regs_ever_live_p (reg + 1)
15808 || call_used_regs[reg + 1]))
15810 if (end_reg > reg + 2)
15811 vfp_output_fldmd (f, SP_REGNUM,
15812 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15813 (end_reg - (reg + 2)) / 2);
15817 if (end_reg > reg + 2)
15818 vfp_output_fldmd (f, SP_REGNUM, 0,
15819 (end_reg - (reg + 2)) / 2);
15823 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15824 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15825 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15827 /* If we can, restore the LR into the PC. */
15828 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15829 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15830 && !IS_STACKALIGN (func_type)
15832 && crtl->args.pretend_args_size == 0
15833 && saved_regs_mask & (1 << LR_REGNUM)
15834 && !crtl->calls_eh_return)
15836 saved_regs_mask &= ~ (1 << LR_REGNUM);
15837 saved_regs_mask |= (1 << PC_REGNUM);
15838 rfe = IS_INTERRUPT (func_type);
15843 /* Load the registers off the stack. If we only have one register
15844 to load use the LDR instruction - it is faster. For Thumb-2
15845 always use pop and the assembler will pick the best instruction.*/
15846 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15847 && !IS_INTERRUPT(func_type))
15849 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15851 else if (saved_regs_mask)
15853 if (saved_regs_mask & (1 << SP_REGNUM))
15854 /* Note - write back to the stack register is not enabled
15855 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15856 in the list of registers and if we add writeback the
15857 instruction becomes UNPREDICTABLE. */
15858 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15860 else if (TARGET_ARM)
15861 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15864 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15867 if (crtl->args.pretend_args_size)
15869 /* Unwind the pre-pushed regs. */
15870 operands[0] = operands[1] = stack_pointer_rtx;
15871 operands[2] = GEN_INT (crtl->args.pretend_args_size);
15872 output_add_immediate (operands);
15876 /* We may have already restored PC directly from the stack. */
15877 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
15880 /* Stack adjustment for exception handler. */
15881 if (crtl->calls_eh_return)
15882 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
15883 ARM_EH_STACKADJ_REGNUM);
15885 /* Generate the return instruction. */
15886 switch ((int) ARM_FUNC_TYPE (func_type))
15890 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
15893 case ARM_FT_EXCEPTION:
15894 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15897 case ARM_FT_INTERWORKED:
15898 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15902 if (IS_STACKALIGN (func_type))
15904 /* See comment in arm_expand_prologue. */
15905 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
15907 if (arm_arch5 || arm_arch4t)
15908 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15910 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15918 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15919 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15921 arm_stack_offsets *offsets;
15927 /* Emit any call-via-reg trampolines that are needed for v4t support
15928 of call_reg and call_value_reg type insns. */
15929 for (regno = 0; regno < LR_REGNUM; regno++)
15931 rtx label = cfun->machine->call_via[regno];
15935 switch_to_section (function_section (current_function_decl));
15936 targetm.asm_out.internal_label (asm_out_file, "L",
15937 CODE_LABEL_NUMBER (label));
15938 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15942 /* ??? Probably not safe to set this here, since it assumes that a
15943 function will be emitted as assembly immediately after we generate
15944 RTL for it. This does not happen for inline functions. */
15945 cfun->machine->return_used_this_function = 0;
15947 else /* TARGET_32BIT */
15949 /* We need to take into account any stack-frame rounding. */
15950 offsets = arm_get_frame_offsets ();
15952 gcc_assert (!use_return_insn (FALSE, NULL)
15953 || (cfun->machine->return_used_this_function != 0)
15954 || offsets->saved_regs == offsets->outgoing_args
15955 || frame_pointer_needed);
15957 /* Reset the ARM-specific per-function variables. */
15958 after_arm_reorg = 0;
15962 /* Generate and emit an insn that we will recognize as a push_multi.
15963 Unfortunately, since this insn does not reflect very well the actual
15964 semantics of the operation, we need to annotate the insn for the benefit
15965 of DWARF2 frame unwind information. */
15967 emit_multi_reg_push (unsigned long mask)
15970 int num_dwarf_regs;
15974 int dwarf_par_index;
15977 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15978 if (mask & (1 << i))
15981 gcc_assert (num_regs && num_regs <= 16);
15983 /* We don't record the PC in the dwarf frame information. */
15984 num_dwarf_regs = num_regs;
15985 if (mask & (1 << PC_REGNUM))
15988 /* For the body of the insn we are going to generate an UNSPEC in
15989 parallel with several USEs. This allows the insn to be recognized
15990 by the push_multi pattern in the arm.md file.
15992 The body of the insn looks something like this:
15995 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15996 (const_int:SI <num>)))
15997 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16003 For the frame note however, we try to be more explicit and actually
16004 show each register being stored into the stack frame, plus a (single)
16005 decrement of the stack pointer. We do it this way in order to be
16006 friendly to the stack unwinding code, which only wants to see a single
16007 stack decrement per instruction. The RTL we generate for the note looks
16008 something like this:
16011 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16012 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16013 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16014 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16018 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16019 instead we'd have a parallel expression detailing all
16020 the stores to the various memory addresses so that debug
16021 information is more up-to-date. Remember however while writing
16022 this to take care of the constraints with the push instruction.
16024 Note also that this has to be taken care of for the VFP registers.
16026 For more see PR43399. */
16028 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16029 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16030 dwarf_par_index = 1;
16032 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16034 if (mask & (1 << i))
16036 reg = gen_rtx_REG (SImode, i);
16038 XVECEXP (par, 0, 0)
16039 = gen_rtx_SET (VOIDmode,
16042 gen_rtx_PRE_MODIFY (Pmode,
16045 (stack_pointer_rtx,
16048 gen_rtx_UNSPEC (BLKmode,
16049 gen_rtvec (1, reg),
16050 UNSPEC_PUSH_MULT));
16052 if (i != PC_REGNUM)
16054 tmp = gen_rtx_SET (VOIDmode,
16055 gen_frame_mem (SImode, stack_pointer_rtx),
16057 RTX_FRAME_RELATED_P (tmp) = 1;
16058 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16066 for (j = 1, i++; j < num_regs; i++)
16068 if (mask & (1 << i))
16070 reg = gen_rtx_REG (SImode, i);
16072 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16074 if (i != PC_REGNUM)
16077 = gen_rtx_SET (VOIDmode,
16080 plus_constant (stack_pointer_rtx,
16083 RTX_FRAME_RELATED_P (tmp) = 1;
16084 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16091 par = emit_insn (par);
16093 tmp = gen_rtx_SET (VOIDmode,
16095 plus_constant (stack_pointer_rtx, -4 * num_regs));
16096 RTX_FRAME_RELATED_P (tmp) = 1;
16097 XVECEXP (dwarf, 0, 0) = tmp;
16099 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16104 /* Calculate the size of the return value that is passed in registers. */
16106 arm_size_return_regs (void)
16108 enum machine_mode mode;
16110 if (crtl->return_rtx != 0)
16111 mode = GET_MODE (crtl->return_rtx);
16113 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16115 return GET_MODE_SIZE (mode);
16119 emit_sfm (int base_reg, int count)
16126 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16127 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16129 reg = gen_rtx_REG (XFmode, base_reg++);
16131 XVECEXP (par, 0, 0)
16132 = gen_rtx_SET (VOIDmode,
16135 gen_rtx_PRE_MODIFY (Pmode,
16138 (stack_pointer_rtx,
16141 gen_rtx_UNSPEC (BLKmode,
16142 gen_rtvec (1, reg),
16143 UNSPEC_PUSH_MULT));
16144 tmp = gen_rtx_SET (VOIDmode,
16145 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16146 RTX_FRAME_RELATED_P (tmp) = 1;
16147 XVECEXP (dwarf, 0, 1) = tmp;
16149 for (i = 1; i < count; i++)
16151 reg = gen_rtx_REG (XFmode, base_reg++);
16152 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16154 tmp = gen_rtx_SET (VOIDmode,
16155 gen_frame_mem (XFmode,
16156 plus_constant (stack_pointer_rtx,
16159 RTX_FRAME_RELATED_P (tmp) = 1;
16160 XVECEXP (dwarf, 0, i + 1) = tmp;
16163 tmp = gen_rtx_SET (VOIDmode,
16165 plus_constant (stack_pointer_rtx, -12 * count));
16167 RTX_FRAME_RELATED_P (tmp) = 1;
16168 XVECEXP (dwarf, 0, 0) = tmp;
16170 par = emit_insn (par);
16171 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16177 /* Return true if the current function needs to save/restore LR. */
16180 thumb_force_lr_save (void)
16182 return !cfun->machine->lr_save_eliminated
16183 && (!leaf_function_p ()
16184 || thumb_far_jump_used_p ()
16185 || df_regs_ever_live_p (LR_REGNUM));
16189 /* Return true if r3 is used by any of the tail call insns in the
16190 current function. */
16193 any_sibcall_uses_r3 (void)
16198 if (!crtl->tail_call_emit)
16200 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16201 if (e->flags & EDGE_SIBCALL)
16203 rtx call = BB_END (e->src);
16204 if (!CALL_P (call))
16205 call = prev_nonnote_nondebug_insn (call);
16206 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16207 if (find_regno_fusage (call, USE, 3))
16214 /* Compute the distance from register FROM to register TO.
16215 These can be the arg pointer (26), the soft frame pointer (25),
16216 the stack pointer (13) or the hard frame pointer (11).
16217 In thumb mode r7 is used as the soft frame pointer, if needed.
16218 Typical stack layout looks like this:
16220 old stack pointer -> | |
16223 | | saved arguments for
16224 | | vararg functions
16227 hard FP & arg pointer -> | | \
16235 soft frame pointer -> | | /
16240 locals base pointer -> | | /
16245 current stack pointer -> | | /
16248 For a given function some or all of these stack components
16249 may not be needed, giving rise to the possibility of
16250 eliminating some of the registers.
16252 The values returned by this function must reflect the behavior
16253 of arm_expand_prologue() and arm_compute_save_reg_mask().
16255 The sign of the number returned reflects the direction of stack
16256 growth, so the values are positive for all eliminations except
16257 from the soft frame pointer to the hard frame pointer.
16259 SFP may point just inside the local variables block to ensure correct
16263 /* Calculate stack offsets. These are used to calculate register elimination
16264 offsets and in prologue/epilogue code. Also calculates which registers
16265 should be saved. */
16267 static arm_stack_offsets *
16268 arm_get_frame_offsets (void)
16270 struct arm_stack_offsets *offsets;
16271 unsigned long func_type;
16275 HOST_WIDE_INT frame_size;
16278 offsets = &cfun->machine->stack_offsets;
16280 /* We need to know if we are a leaf function. Unfortunately, it
16281 is possible to be called after start_sequence has been called,
16282 which causes get_insns to return the insns for the sequence,
16283 not the function, which will cause leaf_function_p to return
16284 the incorrect result.
16286 to know about leaf functions once reload has completed, and the
16287 frame size cannot be changed after that time, so we can safely
16288 use the cached value. */
16290 if (reload_completed)
16293 /* Initially this is the size of the local variables. It will translated
16294 into an offset once we have determined the size of preceding data. */
16295 frame_size = ROUND_UP_WORD (get_frame_size ());
16297 leaf = leaf_function_p ();
16299 /* Space for variadic functions. */
16300 offsets->saved_args = crtl->args.pretend_args_size;
16302 /* In Thumb mode this is incorrect, but never used. */
16303 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16304 arm_compute_static_chain_stack_bytes();
16308 unsigned int regno;
16310 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16311 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16312 saved = core_saved;
16314 /* We know that SP will be doubleword aligned on entry, and we must
16315 preserve that condition at any subroutine call. We also require the
16316 soft frame pointer to be doubleword aligned. */
16318 if (TARGET_REALLY_IWMMXT)
16320 /* Check for the call-saved iWMMXt registers. */
16321 for (regno = FIRST_IWMMXT_REGNUM;
16322 regno <= LAST_IWMMXT_REGNUM;
16324 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16328 func_type = arm_current_func_type ();
16329 if (! IS_VOLATILE (func_type))
16331 /* Space for saved FPA registers. */
16332 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16333 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16336 /* Space for saved VFP registers. */
16337 if (TARGET_HARD_FLOAT && TARGET_VFP)
16338 saved += arm_get_vfp_saved_size ();
16341 else /* TARGET_THUMB1 */
16343 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16344 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16345 saved = core_saved;
16346 if (TARGET_BACKTRACE)
16350 /* Saved registers include the stack frame. */
16351 offsets->saved_regs = offsets->saved_args + saved +
16352 arm_compute_static_chain_stack_bytes();
16353 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16354 /* A leaf function does not need any stack alignment if it has nothing
16356 if (leaf && frame_size == 0
16357 /* However if it calls alloca(), we have a dynamically allocated
16358 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16359 && ! cfun->calls_alloca)
16361 offsets->outgoing_args = offsets->soft_frame;
16362 offsets->locals_base = offsets->soft_frame;
16366 /* Ensure SFP has the correct alignment. */
16367 if (ARM_DOUBLEWORD_ALIGN
16368 && (offsets->soft_frame & 7))
16370 offsets->soft_frame += 4;
16371 /* Try to align stack by pushing an extra reg. Don't bother doing this
16372 when there is a stack frame as the alignment will be rolled into
16373 the normal stack adjustment. */
16374 if (frame_size + crtl->outgoing_args_size == 0)
16378 /* If it is safe to use r3, then do so. This sometimes
16379 generates better code on Thumb-2 by avoiding the need to
16380 use 32-bit push/pop instructions. */
16381 if (! any_sibcall_uses_r3 ()
16382 && arm_size_return_regs () <= 12
16383 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16388 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16390 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16399 offsets->saved_regs += 4;
16400 offsets->saved_regs_mask |= (1 << reg);
16405 offsets->locals_base = offsets->soft_frame + frame_size;
16406 offsets->outgoing_args = (offsets->locals_base
16407 + crtl->outgoing_args_size);
16409 if (ARM_DOUBLEWORD_ALIGN)
16411 /* Ensure SP remains doubleword aligned. */
16412 if (offsets->outgoing_args & 7)
16413 offsets->outgoing_args += 4;
16414 gcc_assert (!(offsets->outgoing_args & 7));
16421 /* Calculate the relative offsets for the different stack pointers. Positive
16422 offsets are in the direction of stack growth. */
16425 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16427 arm_stack_offsets *offsets;
16429 offsets = arm_get_frame_offsets ();
16431 /* OK, now we have enough information to compute the distances.
16432 There must be an entry in these switch tables for each pair
16433 of registers in ELIMINABLE_REGS, even if some of the entries
16434 seem to be redundant or useless. */
16437 case ARG_POINTER_REGNUM:
16440 case THUMB_HARD_FRAME_POINTER_REGNUM:
16443 case FRAME_POINTER_REGNUM:
16444 /* This is the reverse of the soft frame pointer
16445 to hard frame pointer elimination below. */
16446 return offsets->soft_frame - offsets->saved_args;
16448 case ARM_HARD_FRAME_POINTER_REGNUM:
16449 /* This is only non-zero in the case where the static chain register
16450 is stored above the frame. */
16451 return offsets->frame - offsets->saved_args - 4;
16453 case STACK_POINTER_REGNUM:
16454 /* If nothing has been pushed on the stack at all
16455 then this will return -4. This *is* correct! */
16456 return offsets->outgoing_args - (offsets->saved_args + 4);
16459 gcc_unreachable ();
16461 gcc_unreachable ();
16463 case FRAME_POINTER_REGNUM:
16466 case THUMB_HARD_FRAME_POINTER_REGNUM:
16469 case ARM_HARD_FRAME_POINTER_REGNUM:
16470 /* The hard frame pointer points to the top entry in the
16471 stack frame. The soft frame pointer to the bottom entry
16472 in the stack frame. If there is no stack frame at all,
16473 then they are identical. */
16475 return offsets->frame - offsets->soft_frame;
16477 case STACK_POINTER_REGNUM:
16478 return offsets->outgoing_args - offsets->soft_frame;
16481 gcc_unreachable ();
16483 gcc_unreachable ();
16486 /* You cannot eliminate from the stack pointer.
16487 In theory you could eliminate from the hard frame
16488 pointer to the stack pointer, but this will never
16489 happen, since if a stack frame is not needed the
16490 hard frame pointer will never be used. */
16491 gcc_unreachable ();
16495 /* Given FROM and TO register numbers, say whether this elimination is
16496 allowed. Frame pointer elimination is automatically handled.
16498 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16499 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16500 pointer, we must eliminate FRAME_POINTER_REGNUM into
16501 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16502 ARG_POINTER_REGNUM. */
16505 arm_can_eliminate (const int from, const int to)
16507 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16508 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16509 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16510 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16514 /* Emit RTL to save coprocessor registers on function entry. Returns the
16515 number of bytes pushed. */
16518 arm_save_coproc_regs(void)
16520 int saved_size = 0;
16522 unsigned start_reg;
16525 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16526 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16528 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16529 insn = gen_rtx_MEM (V2SImode, insn);
16530 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16531 RTX_FRAME_RELATED_P (insn) = 1;
16535 /* Save any floating point call-saved registers used by this
16537 if (TARGET_FPA_EMU2)
16539 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16540 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16542 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16543 insn = gen_rtx_MEM (XFmode, insn);
16544 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16545 RTX_FRAME_RELATED_P (insn) = 1;
16551 start_reg = LAST_FPA_REGNUM;
16553 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16555 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16557 if (start_reg - reg == 3)
16559 insn = emit_sfm (reg, 4);
16560 RTX_FRAME_RELATED_P (insn) = 1;
16562 start_reg = reg - 1;
16567 if (start_reg != reg)
16569 insn = emit_sfm (reg + 1, start_reg - reg);
16570 RTX_FRAME_RELATED_P (insn) = 1;
16571 saved_size += (start_reg - reg) * 12;
16573 start_reg = reg - 1;
16577 if (start_reg != reg)
16579 insn = emit_sfm (reg + 1, start_reg - reg);
16580 saved_size += (start_reg - reg) * 12;
16581 RTX_FRAME_RELATED_P (insn) = 1;
16584 if (TARGET_HARD_FLOAT && TARGET_VFP)
16586 start_reg = FIRST_VFP_REGNUM;
16588 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16590 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16591 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16593 if (start_reg != reg)
16594 saved_size += vfp_emit_fstmd (start_reg,
16595 (reg - start_reg) / 2);
16596 start_reg = reg + 2;
16599 if (start_reg != reg)
16600 saved_size += vfp_emit_fstmd (start_reg,
16601 (reg - start_reg) / 2);
16607 /* Set the Thumb frame pointer from the stack pointer. */
16610 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16612 HOST_WIDE_INT amount;
16615 amount = offsets->outgoing_args - offsets->locals_base;
16617 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16618 stack_pointer_rtx, GEN_INT (amount)));
16621 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16622 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16623 expects the first two operands to be the same. */
16626 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16628 hard_frame_pointer_rtx));
16632 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16633 hard_frame_pointer_rtx,
16634 stack_pointer_rtx));
16636 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16637 plus_constant (stack_pointer_rtx, amount));
16638 RTX_FRAME_RELATED_P (dwarf) = 1;
16639 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16642 RTX_FRAME_RELATED_P (insn) = 1;
16645 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16648 arm_expand_prologue (void)
16653 unsigned long live_regs_mask;
16654 unsigned long func_type;
16656 int saved_pretend_args = 0;
16657 int saved_regs = 0;
16658 unsigned HOST_WIDE_INT args_to_push;
16659 arm_stack_offsets *offsets;
16661 func_type = arm_current_func_type ();
16663 /* Naked functions don't have prologues. */
16664 if (IS_NAKED (func_type))
16667 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16668 args_to_push = crtl->args.pretend_args_size;
16670 /* Compute which register we will have to save onto the stack. */
16671 offsets = arm_get_frame_offsets ();
16672 live_regs_mask = offsets->saved_regs_mask;
16674 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16676 if (IS_STACKALIGN (func_type))
16680 /* Handle a word-aligned stack pointer. We generate the following:
16685 <save and restore r0 in normal prologue/epilogue>
16689 The unwinder doesn't need to know about the stack realignment.
16690 Just tell it we saved SP in r0. */
16691 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16693 r0 = gen_rtx_REG (SImode, 0);
16694 r1 = gen_rtx_REG (SImode, 1);
16696 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16697 RTX_FRAME_RELATED_P (insn) = 1;
16698 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16700 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16702 /* ??? The CFA changes here, which may cause GDB to conclude that it
16703 has entered a different function. That said, the unwind info is
16704 correct, individually, before and after this instruction because
16705 we've described the save of SP, which will override the default
16706 handling of SP as restoring from the CFA. */
16707 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16710 /* For APCS frames, if IP register is clobbered
16711 when creating frame, save that register in a special
16713 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16715 if (IS_INTERRUPT (func_type))
16717 /* Interrupt functions must not corrupt any registers.
16718 Creating a frame pointer however, corrupts the IP
16719 register, so we must push it first. */
16720 emit_multi_reg_push (1 << IP_REGNUM);
16722 /* Do not set RTX_FRAME_RELATED_P on this insn.
16723 The dwarf stack unwinding code only wants to see one
16724 stack decrement per function, and this is not it. If
16725 this instruction is labeled as being part of the frame
16726 creation sequence then dwarf2out_frame_debug_expr will
16727 die when it encounters the assignment of IP to FP
16728 later on, since the use of SP here establishes SP as
16729 the CFA register and not IP.
16731 Anyway this instruction is not really part of the stack
16732 frame creation although it is part of the prologue. */
16734 else if (IS_NESTED (func_type))
16736 /* The Static chain register is the same as the IP register
16737 used as a scratch register during stack frame creation.
16738 To get around this need to find somewhere to store IP
16739 whilst the frame is being created. We try the following
16742 1. The last argument register.
16743 2. A slot on the stack above the frame. (This only
16744 works if the function is not a varargs function).
16745 3. Register r3, after pushing the argument registers
16748 Note - we only need to tell the dwarf2 backend about the SP
16749 adjustment in the second variant; the static chain register
16750 doesn't need to be unwound, as it doesn't contain a value
16751 inherited from the caller. */
16753 if (df_regs_ever_live_p (3) == false)
16754 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16755 else if (args_to_push == 0)
16759 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16762 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16763 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16766 /* Just tell the dwarf backend that we adjusted SP. */
16767 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16768 plus_constant (stack_pointer_rtx,
16770 RTX_FRAME_RELATED_P (insn) = 1;
16771 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16775 /* Store the args on the stack. */
16776 if (cfun->machine->uses_anonymous_args)
16777 insn = emit_multi_reg_push
16778 ((0xf0 >> (args_to_push / 4)) & 0xf);
16781 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16782 GEN_INT (- args_to_push)));
16784 RTX_FRAME_RELATED_P (insn) = 1;
16786 saved_pretend_args = 1;
16787 fp_offset = args_to_push;
16790 /* Now reuse r3 to preserve IP. */
16791 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16795 insn = emit_set_insn (ip_rtx,
16796 plus_constant (stack_pointer_rtx, fp_offset));
16797 RTX_FRAME_RELATED_P (insn) = 1;
16802 /* Push the argument registers, or reserve space for them. */
16803 if (cfun->machine->uses_anonymous_args)
16804 insn = emit_multi_reg_push
16805 ((0xf0 >> (args_to_push / 4)) & 0xf);
16808 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16809 GEN_INT (- args_to_push)));
16810 RTX_FRAME_RELATED_P (insn) = 1;
16813 /* If this is an interrupt service routine, and the link register
16814 is going to be pushed, and we're not generating extra
16815 push of IP (needed when frame is needed and frame layout if apcs),
16816 subtracting four from LR now will mean that the function return
16817 can be done with a single instruction. */
16818 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16819 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16820 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16823 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16825 emit_set_insn (lr, plus_constant (lr, -4));
16828 if (live_regs_mask)
16830 saved_regs += bit_count (live_regs_mask) * 4;
16831 if (optimize_size && !frame_pointer_needed
16832 && saved_regs == offsets->saved_regs - offsets->saved_args)
16834 /* If no coprocessor registers are being pushed and we don't have
16835 to worry about a frame pointer then push extra registers to
16836 create the stack frame. This is done is a way that does not
16837 alter the frame layout, so is independent of the epilogue. */
16841 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16843 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16844 if (frame && n * 4 >= frame)
16847 live_regs_mask |= (1 << n) - 1;
16848 saved_regs += frame;
16851 insn = emit_multi_reg_push (live_regs_mask);
16852 RTX_FRAME_RELATED_P (insn) = 1;
16855 if (! IS_VOLATILE (func_type))
16856 saved_regs += arm_save_coproc_regs ();
16858 if (frame_pointer_needed && TARGET_ARM)
16860 /* Create the new frame pointer. */
16861 if (TARGET_APCS_FRAME)
16863 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16864 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16865 RTX_FRAME_RELATED_P (insn) = 1;
16867 if (IS_NESTED (func_type))
16869 /* Recover the static chain register. */
16870 if (!df_regs_ever_live_p (3)
16871 || saved_pretend_args)
16872 insn = gen_rtx_REG (SImode, 3);
16873 else /* if (crtl->args.pretend_args_size == 0) */
16875 insn = plus_constant (hard_frame_pointer_rtx, 4);
16876 insn = gen_frame_mem (SImode, insn);
16878 emit_set_insn (ip_rtx, insn);
16879 /* Add a USE to stop propagate_one_insn() from barfing. */
16880 emit_insn (gen_prologue_use (ip_rtx));
16885 insn = GEN_INT (saved_regs - 4);
16886 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16887 stack_pointer_rtx, insn));
16888 RTX_FRAME_RELATED_P (insn) = 1;
16892 if (flag_stack_usage_info)
16893 current_function_static_stack_size
16894 = offsets->outgoing_args - offsets->saved_args;
16896 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16898 /* This add can produce multiple insns for a large constant, so we
16899 need to get tricky. */
16900 rtx last = get_last_insn ();
16902 amount = GEN_INT (offsets->saved_args + saved_regs
16903 - offsets->outgoing_args);
16905 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16909 last = last ? NEXT_INSN (last) : get_insns ();
16910 RTX_FRAME_RELATED_P (last) = 1;
16912 while (last != insn);
16914 /* If the frame pointer is needed, emit a special barrier that
16915 will prevent the scheduler from moving stores to the frame
16916 before the stack adjustment. */
16917 if (frame_pointer_needed)
16918 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16919 hard_frame_pointer_rtx));
16923 if (frame_pointer_needed && TARGET_THUMB2)
16924 thumb_set_frame_pointer (offsets);
16926 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16928 unsigned long mask;
16930 mask = live_regs_mask;
16931 mask &= THUMB2_WORK_REGS;
16932 if (!IS_NESTED (func_type))
16933 mask |= (1 << IP_REGNUM);
16934 arm_load_pic_register (mask);
16937 /* If we are profiling, make sure no instructions are scheduled before
16938 the call to mcount. Similarly if the user has requested no
16939 scheduling in the prolog. Similarly if we want non-call exceptions
16940 using the EABI unwinder, to prevent faulting instructions from being
16941 swapped with a stack adjustment. */
16942 if (crtl->profile || !TARGET_SCHED_PROLOG
16943 || (arm_except_unwind_info (&global_options) == UI_TARGET
16944 && cfun->can_throw_non_call_exceptions))
16945 emit_insn (gen_blockage ());
16947 /* If the link register is being kept alive, with the return address in it,
16948 then make sure that it does not get reused by the ce2 pass. */
16949 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16950 cfun->machine->lr_save_eliminated = 1;
16953 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16955 arm_print_condition (FILE *stream)
16957 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16959 /* Branch conversion is not implemented for Thumb-2. */
16962 output_operand_lossage ("predicated Thumb instruction");
16965 if (current_insn_predicate != NULL)
16967 output_operand_lossage
16968 ("predicated instruction in conditional sequence");
16972 fputs (arm_condition_codes[arm_current_cc], stream);
16974 else if (current_insn_predicate)
16976 enum arm_cond_code code;
16980 output_operand_lossage ("predicated Thumb instruction");
16984 code = get_arm_condition_code (current_insn_predicate);
16985 fputs (arm_condition_codes[code], stream);
16990 /* If CODE is 'd', then the X is a condition operand and the instruction
16991 should only be executed if the condition is true.
16992 if CODE is 'D', then the X is a condition operand and the instruction
16993 should only be executed if the condition is false: however, if the mode
16994 of the comparison is CCFPEmode, then always execute the instruction -- we
16995 do this because in these circumstances !GE does not necessarily imply LT;
16996 in these cases the instruction pattern will take care to make sure that
16997 an instruction containing %d will follow, thereby undoing the effects of
16998 doing this instruction unconditionally.
16999 If CODE is 'N' then X is a floating point operand that must be negated
17001 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17002 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17004 arm_print_operand (FILE *stream, rtx x, int code)
17009 fputs (ASM_COMMENT_START, stream);
17013 fputs (user_label_prefix, stream);
17017 fputs (REGISTER_PREFIX, stream);
17021 arm_print_condition (stream);
17025 /* Nothing in unified syntax, otherwise the current condition code. */
17026 if (!TARGET_UNIFIED_ASM)
17027 arm_print_condition (stream);
17031 /* The current condition code in unified syntax, otherwise nothing. */
17032 if (TARGET_UNIFIED_ASM)
17033 arm_print_condition (stream);
17037 /* The current condition code for a condition code setting instruction.
17038 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17039 if (TARGET_UNIFIED_ASM)
17041 fputc('s', stream);
17042 arm_print_condition (stream);
17046 arm_print_condition (stream);
17047 fputc('s', stream);
17052 /* If the instruction is conditionally executed then print
17053 the current condition code, otherwise print 's'. */
17054 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17055 if (current_insn_predicate)
17056 arm_print_condition (stream);
17058 fputc('s', stream);
17061 /* %# is a "break" sequence. It doesn't output anything, but is used to
17062 separate e.g. operand numbers from following text, if that text consists
17063 of further digits which we don't want to be part of the operand
17071 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17072 r = real_value_negate (&r);
17073 fprintf (stream, "%s", fp_const_from_val (&r));
17077 /* An integer or symbol address without a preceding # sign. */
17079 switch (GET_CODE (x))
17082 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17086 output_addr_const (stream, x);
17090 if (GET_CODE (XEXP (x, 0)) == PLUS
17091 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17093 output_addr_const (stream, x);
17096 /* Fall through. */
17099 output_operand_lossage ("Unsupported operand for code '%c'", code);
17104 if (GET_CODE (x) == CONST_INT)
17107 val = ARM_SIGN_EXTEND (~INTVAL (x));
17108 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17112 putc ('~', stream);
17113 output_addr_const (stream, x);
17118 /* The low 16 bits of an immediate constant. */
17119 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17123 fprintf (stream, "%s", arithmetic_instr (x, 1));
17126 /* Truncate Cirrus shift counts. */
17128 if (GET_CODE (x) == CONST_INT)
17130 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17133 arm_print_operand (stream, x, 0);
17137 fprintf (stream, "%s", arithmetic_instr (x, 0));
17145 if (!shift_operator (x, SImode))
17147 output_operand_lossage ("invalid shift operand");
17151 shift = shift_op (x, &val);
17155 fprintf (stream, ", %s ", shift);
17157 arm_print_operand (stream, XEXP (x, 1), 0);
17159 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17164 /* An explanation of the 'Q', 'R' and 'H' register operands:
17166 In a pair of registers containing a DI or DF value the 'Q'
17167 operand returns the register number of the register containing
17168 the least significant part of the value. The 'R' operand returns
17169 the register number of the register containing the most
17170 significant part of the value.
17172 The 'H' operand returns the higher of the two register numbers.
17173 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17174 same as the 'Q' operand, since the most significant part of the
17175 value is held in the lower number register. The reverse is true
17176 on systems where WORDS_BIG_ENDIAN is false.
17178 The purpose of these operands is to distinguish between cases
17179 where the endian-ness of the values is important (for example
17180 when they are added together), and cases where the endian-ness
17181 is irrelevant, but the order of register operations is important.
17182 For example when loading a value from memory into a register
17183 pair, the endian-ness does not matter. Provided that the value
17184 from the lower memory address is put into the lower numbered
17185 register, and the value from the higher address is put into the
17186 higher numbered register, the load will work regardless of whether
17187 the value being loaded is big-wordian or little-wordian. The
17188 order of the two register loads can matter however, if the address
17189 of the memory location is actually held in one of the registers
17190 being overwritten by the load.
17192 The 'Q' and 'R' constraints are also available for 64-bit
17195 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17197 rtx part = gen_lowpart (SImode, x);
17198 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17202 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17204 output_operand_lossage ("invalid operand for code '%c'", code);
17208 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17212 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17214 enum machine_mode mode = GET_MODE (x);
17217 if (mode == VOIDmode)
17219 part = gen_highpart_mode (SImode, mode, x);
17220 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17224 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17226 output_operand_lossage ("invalid operand for code '%c'", code);
17230 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17234 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17236 output_operand_lossage ("invalid operand for code '%c'", code);
17240 asm_fprintf (stream, "%r", REGNO (x) + 1);
17244 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17246 output_operand_lossage ("invalid operand for code '%c'", code);
17250 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17254 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17256 output_operand_lossage ("invalid operand for code '%c'", code);
17260 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17264 asm_fprintf (stream, "%r",
17265 GET_CODE (XEXP (x, 0)) == REG
17266 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17270 asm_fprintf (stream, "{%r-%r}",
17272 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17275 /* Like 'M', but writing doubleword vector registers, for use by Neon
17279 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17280 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17282 asm_fprintf (stream, "{d%d}", regno);
17284 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17289 /* CONST_TRUE_RTX means always -- that's the default. */
17290 if (x == const_true_rtx)
17293 if (!COMPARISON_P (x))
17295 output_operand_lossage ("invalid operand for code '%c'", code);
17299 fputs (arm_condition_codes[get_arm_condition_code (x)],
17304 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17305 want to do that. */
17306 if (x == const_true_rtx)
17308 output_operand_lossage ("instruction never executed");
17311 if (!COMPARISON_P (x))
17313 output_operand_lossage ("invalid operand for code '%c'", code);
17317 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17318 (get_arm_condition_code (x))],
17322 /* Cirrus registers can be accessed in a variety of ways:
17323 single floating point (f)
17324 double floating point (d)
17326 64bit integer (dx). */
17327 case 'W': /* Cirrus register in F mode. */
17328 case 'X': /* Cirrus register in D mode. */
17329 case 'Y': /* Cirrus register in FX mode. */
17330 case 'Z': /* Cirrus register in DX mode. */
17331 gcc_assert (GET_CODE (x) == REG
17332 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17334 fprintf (stream, "mv%s%s",
17336 : code == 'X' ? "d"
17337 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17341 /* Print cirrus register in the mode specified by the register's mode. */
17344 int mode = GET_MODE (x);
17346 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17348 output_operand_lossage ("invalid operand for code '%c'", code);
17352 fprintf (stream, "mv%s%s",
17353 mode == DFmode ? "d"
17354 : mode == SImode ? "fx"
17355 : mode == DImode ? "dx"
17356 : "f", reg_names[REGNO (x)] + 2);
17362 if (GET_CODE (x) != REG
17363 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17364 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17365 /* Bad value for wCG register number. */
17367 output_operand_lossage ("invalid operand for code '%c'", code);
17372 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17375 /* Print an iWMMXt control register name. */
17377 if (GET_CODE (x) != CONST_INT
17379 || INTVAL (x) >= 16)
17380 /* Bad value for wC register number. */
17382 output_operand_lossage ("invalid operand for code '%c'", code);
17388 static const char * wc_reg_names [16] =
17390 "wCID", "wCon", "wCSSF", "wCASF",
17391 "wC4", "wC5", "wC6", "wC7",
17392 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17393 "wC12", "wC13", "wC14", "wC15"
17396 fprintf (stream, wc_reg_names [INTVAL (x)]);
17400 /* Print the high single-precision register of a VFP double-precision
17404 int mode = GET_MODE (x);
17407 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17409 output_operand_lossage ("invalid operand for code '%c'", code);
17414 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17416 output_operand_lossage ("invalid operand for code '%c'", code);
17420 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17424 /* Print a VFP/Neon double precision or quad precision register name. */
17428 int mode = GET_MODE (x);
17429 int is_quad = (code == 'q');
17432 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17434 output_operand_lossage ("invalid operand for code '%c'", code);
17438 if (GET_CODE (x) != REG
17439 || !IS_VFP_REGNUM (REGNO (x)))
17441 output_operand_lossage ("invalid operand for code '%c'", code);
17446 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17447 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17449 output_operand_lossage ("invalid operand for code '%c'", code);
17453 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17454 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17458 /* These two codes print the low/high doubleword register of a Neon quad
17459 register, respectively. For pair-structure types, can also print
17460 low/high quadword registers. */
17464 int mode = GET_MODE (x);
17467 if ((GET_MODE_SIZE (mode) != 16
17468 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17470 output_operand_lossage ("invalid operand for code '%c'", code);
17475 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17477 output_operand_lossage ("invalid operand for code '%c'", code);
17481 if (GET_MODE_SIZE (mode) == 16)
17482 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17483 + (code == 'f' ? 1 : 0));
17485 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17486 + (code == 'f' ? 1 : 0));
17490 /* Print a VFPv3 floating-point constant, represented as an integer
17494 int index = vfp3_const_double_index (x);
17495 gcc_assert (index != -1);
17496 fprintf (stream, "%d", index);
17500 /* Print bits representing opcode features for Neon.
17502 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17503 and polynomials as unsigned.
17505 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17507 Bit 2 is 1 for rounding functions, 0 otherwise. */
17509 /* Identify the type as 's', 'u', 'p' or 'f'. */
17512 HOST_WIDE_INT bits = INTVAL (x);
17513 fputc ("uspf"[bits & 3], stream);
17517 /* Likewise, but signed and unsigned integers are both 'i'. */
17520 HOST_WIDE_INT bits = INTVAL (x);
17521 fputc ("iipf"[bits & 3], stream);
17525 /* As for 'T', but emit 'u' instead of 'p'. */
17528 HOST_WIDE_INT bits = INTVAL (x);
17529 fputc ("usuf"[bits & 3], stream);
17533 /* Bit 2: rounding (vs none). */
17536 HOST_WIDE_INT bits = INTVAL (x);
17537 fputs ((bits & 4) != 0 ? "r" : "", stream);
17541 /* Memory operand for vld1/vst1 instruction. */
17545 bool postinc = FALSE;
17546 unsigned align, memsize, align_bits;
17548 gcc_assert (GET_CODE (x) == MEM);
17549 addr = XEXP (x, 0);
17550 if (GET_CODE (addr) == POST_INC)
17553 addr = XEXP (addr, 0);
17555 asm_fprintf (stream, "[%r", REGNO (addr));
17557 /* We know the alignment of this access, so we can emit a hint in the
17558 instruction (for some alignments) as an aid to the memory subsystem
17560 align = MEM_ALIGN (x) >> 3;
17561 memsize = MEM_SIZE (x);
17563 /* Only certain alignment specifiers are supported by the hardware. */
17564 if (memsize == 16 && (align % 32) == 0)
17566 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
17568 else if ((align % 8) == 0)
17573 if (align_bits != 0)
17574 asm_fprintf (stream, ":%d", align_bits);
17576 asm_fprintf (stream, "]");
17579 fputs("!", stream);
17587 gcc_assert (GET_CODE (x) == MEM);
17588 addr = XEXP (x, 0);
17589 gcc_assert (GET_CODE (addr) == REG);
17590 asm_fprintf (stream, "[%r]", REGNO (addr));
17594 /* Translate an S register number into a D register number and element index. */
17597 int mode = GET_MODE (x);
17600 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17602 output_operand_lossage ("invalid operand for code '%c'", code);
17607 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17609 output_operand_lossage ("invalid operand for code '%c'", code);
17613 regno = regno - FIRST_VFP_REGNUM;
17614 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17618 /* Register specifier for vld1.16/vst1.16. Translate the S register
17619 number into a D register number and element index. */
17622 int mode = GET_MODE (x);
17625 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17627 output_operand_lossage ("invalid operand for code '%c'", code);
17632 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17634 output_operand_lossage ("invalid operand for code '%c'", code);
17638 regno = regno - FIRST_VFP_REGNUM;
17639 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17646 output_operand_lossage ("missing operand");
17650 switch (GET_CODE (x))
17653 asm_fprintf (stream, "%r", REGNO (x));
17657 output_memory_reference_mode = GET_MODE (x);
17658 output_address (XEXP (x, 0));
17665 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17666 sizeof (fpstr), 0, 1);
17667 fprintf (stream, "#%s", fpstr);
17670 fprintf (stream, "#%s", fp_immediate_constant (x));
17674 gcc_assert (GET_CODE (x) != NEG);
17675 fputc ('#', stream);
17676 if (GET_CODE (x) == HIGH)
17678 fputs (":lower16:", stream);
17682 output_addr_const (stream, x);
17688 /* Target hook for printing a memory address. */
17690 arm_print_operand_address (FILE *stream, rtx x)
17694 int is_minus = GET_CODE (x) == MINUS;
17696 if (GET_CODE (x) == REG)
17697 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17698 else if (GET_CODE (x) == PLUS || is_minus)
17700 rtx base = XEXP (x, 0);
17701 rtx index = XEXP (x, 1);
17702 HOST_WIDE_INT offset = 0;
17703 if (GET_CODE (base) != REG
17704 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17706 /* Ensure that BASE is a register. */
17707 /* (one of them must be). */
17708 /* Also ensure the SP is not used as in index register. */
17713 switch (GET_CODE (index))
17716 offset = INTVAL (index);
17719 asm_fprintf (stream, "[%r, #%wd]",
17720 REGNO (base), offset);
17724 asm_fprintf (stream, "[%r, %s%r]",
17725 REGNO (base), is_minus ? "-" : "",
17735 asm_fprintf (stream, "[%r, %s%r",
17736 REGNO (base), is_minus ? "-" : "",
17737 REGNO (XEXP (index, 0)));
17738 arm_print_operand (stream, index, 'S');
17739 fputs ("]", stream);
17744 gcc_unreachable ();
17747 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17748 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17750 extern enum machine_mode output_memory_reference_mode;
17752 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17754 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17755 asm_fprintf (stream, "[%r, #%s%d]!",
17756 REGNO (XEXP (x, 0)),
17757 GET_CODE (x) == PRE_DEC ? "-" : "",
17758 GET_MODE_SIZE (output_memory_reference_mode));
17760 asm_fprintf (stream, "[%r], #%s%d",
17761 REGNO (XEXP (x, 0)),
17762 GET_CODE (x) == POST_DEC ? "-" : "",
17763 GET_MODE_SIZE (output_memory_reference_mode));
17765 else if (GET_CODE (x) == PRE_MODIFY)
17767 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17768 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17769 asm_fprintf (stream, "#%wd]!",
17770 INTVAL (XEXP (XEXP (x, 1), 1)));
17772 asm_fprintf (stream, "%r]!",
17773 REGNO (XEXP (XEXP (x, 1), 1)));
17775 else if (GET_CODE (x) == POST_MODIFY)
17777 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17778 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17779 asm_fprintf (stream, "#%wd",
17780 INTVAL (XEXP (XEXP (x, 1), 1)));
17782 asm_fprintf (stream, "%r",
17783 REGNO (XEXP (XEXP (x, 1), 1)));
17785 else output_addr_const (stream, x);
17789 if (GET_CODE (x) == REG)
17790 asm_fprintf (stream, "[%r]", REGNO (x));
17791 else if (GET_CODE (x) == POST_INC)
17792 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17793 else if (GET_CODE (x) == PLUS)
17795 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17796 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17797 asm_fprintf (stream, "[%r, #%wd]",
17798 REGNO (XEXP (x, 0)),
17799 INTVAL (XEXP (x, 1)));
17801 asm_fprintf (stream, "[%r, %r]",
17802 REGNO (XEXP (x, 0)),
17803 REGNO (XEXP (x, 1)));
17806 output_addr_const (stream, x);
17810 /* Target hook for indicating whether a punctuation character for
17811 TARGET_PRINT_OPERAND is valid. */
17813 arm_print_operand_punct_valid_p (unsigned char code)
17815 return (code == '@' || code == '|' || code == '.'
17816 || code == '(' || code == ')' || code == '#'
17817 || (TARGET_32BIT && (code == '?'))
17818 || (TARGET_THUMB2 && (code == '!'))
17819 || (TARGET_THUMB && (code == '_')));
17822 /* Target hook for assembling integer objects. The ARM version needs to
17823 handle word-sized values specially. */
17825 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17827 enum machine_mode mode;
17829 if (size == UNITS_PER_WORD && aligned_p)
17831 fputs ("\t.word\t", asm_out_file);
17832 output_addr_const (asm_out_file, x);
17834 /* Mark symbols as position independent. We only do this in the
17835 .text segment, not in the .data segment. */
17836 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17837 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17839 /* See legitimize_pic_address for an explanation of the
17840 TARGET_VXWORKS_RTP check. */
17841 if (TARGET_VXWORKS_RTP
17842 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17843 fputs ("(GOT)", asm_out_file);
17845 fputs ("(GOTOFF)", asm_out_file);
17847 fputc ('\n', asm_out_file);
17851 mode = GET_MODE (x);
17853 if (arm_vector_mode_supported_p (mode))
17857 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17859 units = CONST_VECTOR_NUNITS (x);
17860 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17862 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17863 for (i = 0; i < units; i++)
17865 rtx elt = CONST_VECTOR_ELT (x, i);
17867 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17870 for (i = 0; i < units; i++)
17872 rtx elt = CONST_VECTOR_ELT (x, i);
17873 REAL_VALUE_TYPE rval;
17875 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17878 (rval, GET_MODE_INNER (mode),
17879 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17885 return default_assemble_integer (x, size, aligned_p);
17889 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17893 if (!TARGET_AAPCS_BASED)
17896 default_named_section_asm_out_constructor
17897 : default_named_section_asm_out_destructor) (symbol, priority);
17901 /* Put these in the .init_array section, using a special relocation. */
17902 if (priority != DEFAULT_INIT_PRIORITY)
17905 sprintf (buf, "%s.%.5u",
17906 is_ctor ? ".init_array" : ".fini_array",
17908 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17915 switch_to_section (s);
17916 assemble_align (POINTER_SIZE);
17917 fputs ("\t.word\t", asm_out_file);
17918 output_addr_const (asm_out_file, symbol);
17919 fputs ("(target1)\n", asm_out_file);
17922 /* Add a function to the list of static constructors. */
17925 arm_elf_asm_constructor (rtx symbol, int priority)
17927 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17930 /* Add a function to the list of static destructors. */
17933 arm_elf_asm_destructor (rtx symbol, int priority)
17935 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17938 /* A finite state machine takes care of noticing whether or not instructions
17939 can be conditionally executed, and thus decrease execution time and code
17940 size by deleting branch instructions. The fsm is controlled by
17941 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17943 /* The state of the fsm controlling condition codes are:
17944 0: normal, do nothing special
17945 1: make ASM_OUTPUT_OPCODE not output this instruction
17946 2: make ASM_OUTPUT_OPCODE not output this instruction
17947 3: make instructions conditional
17948 4: make instructions conditional
17950 State transitions (state->state by whom under condition):
17951 0 -> 1 final_prescan_insn if the `target' is a label
17952 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17953 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17954 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17955 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17956 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17957 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17958 (the target insn is arm_target_insn).
17960 If the jump clobbers the conditions then we use states 2 and 4.
17962 A similar thing can be done with conditional return insns.
17964 XXX In case the `target' is an unconditional branch, this conditionalising
17965 of the instructions always reduces code size, but not always execution
17966 time. But then, I want to reduce the code size to somewhere near what
17967 /bin/cc produces. */
17969 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17970 instructions. When a COND_EXEC instruction is seen the subsequent
17971 instructions are scanned so that multiple conditional instructions can be
17972 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17973 specify the length and true/false mask for the IT block. These will be
17974 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17976 /* Returns the index of the ARM condition code string in
17977 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17978 COMPARISON should be an rtx like `(eq (...) (...))'. */
17981 maybe_get_arm_condition_code (rtx comparison)
17983 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17984 enum arm_cond_code code;
17985 enum rtx_code comp_code = GET_CODE (comparison);
17987 if (GET_MODE_CLASS (mode) != MODE_CC)
17988 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17989 XEXP (comparison, 1));
17993 case CC_DNEmode: code = ARM_NE; goto dominance;
17994 case CC_DEQmode: code = ARM_EQ; goto dominance;
17995 case CC_DGEmode: code = ARM_GE; goto dominance;
17996 case CC_DGTmode: code = ARM_GT; goto dominance;
17997 case CC_DLEmode: code = ARM_LE; goto dominance;
17998 case CC_DLTmode: code = ARM_LT; goto dominance;
17999 case CC_DGEUmode: code = ARM_CS; goto dominance;
18000 case CC_DGTUmode: code = ARM_HI; goto dominance;
18001 case CC_DLEUmode: code = ARM_LS; goto dominance;
18002 case CC_DLTUmode: code = ARM_CC;
18005 if (comp_code == EQ)
18006 return ARM_INVERSE_CONDITION_CODE (code);
18007 if (comp_code == NE)
18014 case NE: return ARM_NE;
18015 case EQ: return ARM_EQ;
18016 case GE: return ARM_PL;
18017 case LT: return ARM_MI;
18018 default: return ARM_NV;
18024 case NE: return ARM_NE;
18025 case EQ: return ARM_EQ;
18026 default: return ARM_NV;
18032 case NE: return ARM_MI;
18033 case EQ: return ARM_PL;
18034 default: return ARM_NV;
18039 /* These encodings assume that AC=1 in the FPA system control
18040 byte. This allows us to handle all cases except UNEQ and
18044 case GE: return ARM_GE;
18045 case GT: return ARM_GT;
18046 case LE: return ARM_LS;
18047 case LT: return ARM_MI;
18048 case NE: return ARM_NE;
18049 case EQ: return ARM_EQ;
18050 case ORDERED: return ARM_VC;
18051 case UNORDERED: return ARM_VS;
18052 case UNLT: return ARM_LT;
18053 case UNLE: return ARM_LE;
18054 case UNGT: return ARM_HI;
18055 case UNGE: return ARM_PL;
18056 /* UNEQ and LTGT do not have a representation. */
18057 case UNEQ: /* Fall through. */
18058 case LTGT: /* Fall through. */
18059 default: return ARM_NV;
18065 case NE: return ARM_NE;
18066 case EQ: return ARM_EQ;
18067 case GE: return ARM_LE;
18068 case GT: return ARM_LT;
18069 case LE: return ARM_GE;
18070 case LT: return ARM_GT;
18071 case GEU: return ARM_LS;
18072 case GTU: return ARM_CC;
18073 case LEU: return ARM_CS;
18074 case LTU: return ARM_HI;
18075 default: return ARM_NV;
18081 case LTU: return ARM_CS;
18082 case GEU: return ARM_CC;
18083 default: return ARM_NV;
18089 case NE: return ARM_NE;
18090 case EQ: return ARM_EQ;
18091 case GEU: return ARM_CS;
18092 case GTU: return ARM_HI;
18093 case LEU: return ARM_LS;
18094 case LTU: return ARM_CC;
18095 default: return ARM_NV;
18101 case GE: return ARM_GE;
18102 case LT: return ARM_LT;
18103 case GEU: return ARM_CS;
18104 case LTU: return ARM_CC;
18105 default: return ARM_NV;
18111 case NE: return ARM_NE;
18112 case EQ: return ARM_EQ;
18113 case GE: return ARM_GE;
18114 case GT: return ARM_GT;
18115 case LE: return ARM_LE;
18116 case LT: return ARM_LT;
18117 case GEU: return ARM_CS;
18118 case GTU: return ARM_HI;
18119 case LEU: return ARM_LS;
18120 case LTU: return ARM_CC;
18121 default: return ARM_NV;
18124 default: gcc_unreachable ();
18128 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18129 static enum arm_cond_code
18130 get_arm_condition_code (rtx comparison)
18132 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18133 gcc_assert (code != ARM_NV);
18137 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18140 thumb2_final_prescan_insn (rtx insn)
18142 rtx first_insn = insn;
18143 rtx body = PATTERN (insn);
18145 enum arm_cond_code code;
18149 /* Remove the previous insn from the count of insns to be output. */
18150 if (arm_condexec_count)
18151 arm_condexec_count--;
18153 /* Nothing to do if we are already inside a conditional block. */
18154 if (arm_condexec_count)
18157 if (GET_CODE (body) != COND_EXEC)
18160 /* Conditional jumps are implemented directly. */
18161 if (GET_CODE (insn) == JUMP_INSN)
18164 predicate = COND_EXEC_TEST (body);
18165 arm_current_cc = get_arm_condition_code (predicate);
18167 n = get_attr_ce_count (insn);
18168 arm_condexec_count = 1;
18169 arm_condexec_mask = (1 << n) - 1;
18170 arm_condexec_masklen = n;
18171 /* See if subsequent instructions can be combined into the same block. */
18174 insn = next_nonnote_insn (insn);
18176 /* Jumping into the middle of an IT block is illegal, so a label or
18177 barrier terminates the block. */
18178 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18181 body = PATTERN (insn);
18182 /* USE and CLOBBER aren't really insns, so just skip them. */
18183 if (GET_CODE (body) == USE
18184 || GET_CODE (body) == CLOBBER)
18187 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18188 if (GET_CODE (body) != COND_EXEC)
18190 /* Allow up to 4 conditionally executed instructions in a block. */
18191 n = get_attr_ce_count (insn);
18192 if (arm_condexec_masklen + n > 4)
18195 predicate = COND_EXEC_TEST (body);
18196 code = get_arm_condition_code (predicate);
18197 mask = (1 << n) - 1;
18198 if (arm_current_cc == code)
18199 arm_condexec_mask |= (mask << arm_condexec_masklen);
18200 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18203 arm_condexec_count++;
18204 arm_condexec_masklen += n;
18206 /* A jump must be the last instruction in a conditional block. */
18207 if (GET_CODE(insn) == JUMP_INSN)
18210 /* Restore recog_data (getting the attributes of other insns can
18211 destroy this array, but final.c assumes that it remains intact
18212 across this call). */
18213 extract_constrain_insn_cached (first_insn);
18217 arm_final_prescan_insn (rtx insn)
18219 /* BODY will hold the body of INSN. */
18220 rtx body = PATTERN (insn);
18222 /* This will be 1 if trying to repeat the trick, and things need to be
18223 reversed if it appears to fail. */
18226 /* If we start with a return insn, we only succeed if we find another one. */
18227 int seeking_return = 0;
18228 enum rtx_code return_code = UNKNOWN;
18230 /* START_INSN will hold the insn from where we start looking. This is the
18231 first insn after the following code_label if REVERSE is true. */
18232 rtx start_insn = insn;
18234 /* If in state 4, check if the target branch is reached, in order to
18235 change back to state 0. */
18236 if (arm_ccfsm_state == 4)
18238 if (insn == arm_target_insn)
18240 arm_target_insn = NULL;
18241 arm_ccfsm_state = 0;
18246 /* If in state 3, it is possible to repeat the trick, if this insn is an
18247 unconditional branch to a label, and immediately following this branch
18248 is the previous target label which is only used once, and the label this
18249 branch jumps to is not too far off. */
18250 if (arm_ccfsm_state == 3)
18252 if (simplejump_p (insn))
18254 start_insn = next_nonnote_insn (start_insn);
18255 if (GET_CODE (start_insn) == BARRIER)
18257 /* XXX Isn't this always a barrier? */
18258 start_insn = next_nonnote_insn (start_insn);
18260 if (GET_CODE (start_insn) == CODE_LABEL
18261 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18262 && LABEL_NUSES (start_insn) == 1)
18267 else if (ANY_RETURN_P (body))
18269 start_insn = next_nonnote_insn (start_insn);
18270 if (GET_CODE (start_insn) == BARRIER)
18271 start_insn = next_nonnote_insn (start_insn);
18272 if (GET_CODE (start_insn) == CODE_LABEL
18273 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18274 && LABEL_NUSES (start_insn) == 1)
18277 seeking_return = 1;
18278 return_code = GET_CODE (body);
18287 gcc_assert (!arm_ccfsm_state || reverse);
18288 if (GET_CODE (insn) != JUMP_INSN)
18291 /* This jump might be paralleled with a clobber of the condition codes
18292 the jump should always come first */
18293 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18294 body = XVECEXP (body, 0, 0);
18297 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18298 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18301 int fail = FALSE, succeed = FALSE;
18302 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18303 int then_not_else = TRUE;
18304 rtx this_insn = start_insn, label = 0;
18306 /* Register the insn jumped to. */
18309 if (!seeking_return)
18310 label = XEXP (SET_SRC (body), 0);
18312 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18313 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18314 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18316 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18317 then_not_else = FALSE;
18319 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18321 seeking_return = 1;
18322 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18324 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18326 seeking_return = 1;
18327 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18328 then_not_else = FALSE;
18331 gcc_unreachable ();
18333 /* See how many insns this branch skips, and what kind of insns. If all
18334 insns are okay, and the label or unconditional branch to the same
18335 label is not too far away, succeed. */
18336 for (insns_skipped = 0;
18337 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18341 this_insn = next_nonnote_insn (this_insn);
18345 switch (GET_CODE (this_insn))
18348 /* Succeed if it is the target label, otherwise fail since
18349 control falls in from somewhere else. */
18350 if (this_insn == label)
18352 arm_ccfsm_state = 1;
18360 /* Succeed if the following insn is the target label.
18362 If return insns are used then the last insn in a function
18363 will be a barrier. */
18364 this_insn = next_nonnote_insn (this_insn);
18365 if (this_insn && this_insn == label)
18367 arm_ccfsm_state = 1;
18375 /* The AAPCS says that conditional calls should not be
18376 used since they make interworking inefficient (the
18377 linker can't transform BL<cond> into BLX). That's
18378 only a problem if the machine has BLX. */
18385 /* Succeed if the following insn is the target label, or
18386 if the following two insns are a barrier and the
18388 this_insn = next_nonnote_insn (this_insn);
18389 if (this_insn && GET_CODE (this_insn) == BARRIER)
18390 this_insn = next_nonnote_insn (this_insn);
18392 if (this_insn && this_insn == label
18393 && insns_skipped < max_insns_skipped)
18395 arm_ccfsm_state = 1;
18403 /* If this is an unconditional branch to the same label, succeed.
18404 If it is to another label, do nothing. If it is conditional,
18406 /* XXX Probably, the tests for SET and the PC are
18409 scanbody = PATTERN (this_insn);
18410 if (GET_CODE (scanbody) == SET
18411 && GET_CODE (SET_DEST (scanbody)) == PC)
18413 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18414 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18416 arm_ccfsm_state = 2;
18419 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18422 /* Fail if a conditional return is undesirable (e.g. on a
18423 StrongARM), but still allow this if optimizing for size. */
18424 else if (GET_CODE (scanbody) == return_code
18425 && !use_return_insn (TRUE, NULL)
18428 else if (GET_CODE (scanbody) == return_code)
18430 arm_ccfsm_state = 2;
18433 else if (GET_CODE (scanbody) == PARALLEL)
18435 switch (get_attr_conds (this_insn))
18445 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18450 /* Instructions using or affecting the condition codes make it
18452 scanbody = PATTERN (this_insn);
18453 if (!(GET_CODE (scanbody) == SET
18454 || GET_CODE (scanbody) == PARALLEL)
18455 || get_attr_conds (this_insn) != CONDS_NOCOND)
18458 /* A conditional cirrus instruction must be followed by
18459 a non Cirrus instruction. However, since we
18460 conditionalize instructions in this function and by
18461 the time we get here we can't add instructions
18462 (nops), because shorten_branches() has already been
18463 called, we will disable conditionalizing Cirrus
18464 instructions to be safe. */
18465 if (GET_CODE (scanbody) != USE
18466 && GET_CODE (scanbody) != CLOBBER
18467 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18477 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18478 arm_target_label = CODE_LABEL_NUMBER (label);
18481 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18483 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18485 this_insn = next_nonnote_insn (this_insn);
18486 gcc_assert (!this_insn
18487 || (GET_CODE (this_insn) != BARRIER
18488 && GET_CODE (this_insn) != CODE_LABEL));
18492 /* Oh, dear! we ran off the end.. give up. */
18493 extract_constrain_insn_cached (insn);
18494 arm_ccfsm_state = 0;
18495 arm_target_insn = NULL;
18498 arm_target_insn = this_insn;
18501 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18504 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18506 if (reverse || then_not_else)
18507 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18510 /* Restore recog_data (getting the attributes of other insns can
18511 destroy this array, but final.c assumes that it remains intact
18512 across this call. */
18513 extract_constrain_insn_cached (insn);
18517 /* Output IT instructions. */
18519 thumb2_asm_output_opcode (FILE * stream)
18524 if (arm_condexec_mask)
18526 for (n = 0; n < arm_condexec_masklen; n++)
18527 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18529 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18530 arm_condition_codes[arm_current_cc]);
18531 arm_condexec_mask = 0;
18535 /* Returns true if REGNO is a valid register
18536 for holding a quantity of type MODE. */
18538 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18540 if (GET_MODE_CLASS (mode) == MODE_CC)
18541 return (regno == CC_REGNUM
18542 || (TARGET_HARD_FLOAT && TARGET_VFP
18543 && regno == VFPCC_REGNUM));
18546 /* For the Thumb we only allow values bigger than SImode in
18547 registers 0 - 6, so that there is always a second low
18548 register available to hold the upper part of the value.
18549 We probably we ought to ensure that the register is the
18550 start of an even numbered register pair. */
18551 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18553 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18554 && IS_CIRRUS_REGNUM (regno))
18555 /* We have outlawed SI values in Cirrus registers because they
18556 reside in the lower 32 bits, but SF values reside in the
18557 upper 32 bits. This causes gcc all sorts of grief. We can't
18558 even split the registers into pairs because Cirrus SI values
18559 get sign extended to 64bits-- aldyh. */
18560 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18562 if (TARGET_HARD_FLOAT && TARGET_VFP
18563 && IS_VFP_REGNUM (regno))
18565 if (mode == SFmode || mode == SImode)
18566 return VFP_REGNO_OK_FOR_SINGLE (regno);
18568 if (mode == DFmode)
18569 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18571 /* VFP registers can hold HFmode values, but there is no point in
18572 putting them there unless we have hardware conversion insns. */
18573 if (mode == HFmode)
18574 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18577 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18578 || (VALID_NEON_QREG_MODE (mode)
18579 && NEON_REGNO_OK_FOR_QUAD (regno))
18580 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18581 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18582 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18583 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18584 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18589 if (TARGET_REALLY_IWMMXT)
18591 if (IS_IWMMXT_GR_REGNUM (regno))
18592 return mode == SImode;
18594 if (IS_IWMMXT_REGNUM (regno))
18595 return VALID_IWMMXT_REG_MODE (mode);
18598 /* We allow almost any value to be stored in the general registers.
18599 Restrict doubleword quantities to even register pairs so that we can
18600 use ldrd. Do not allow very large Neon structure opaque modes in
18601 general registers; they would use too many. */
18602 if (regno <= LAST_ARM_REGNUM)
18603 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18604 && ARM_NUM_REGS (mode) <= 4;
18606 if (regno == FRAME_POINTER_REGNUM
18607 || regno == ARG_POINTER_REGNUM)
18608 /* We only allow integers in the fake hard registers. */
18609 return GET_MODE_CLASS (mode) == MODE_INT;
18611 /* The only registers left are the FPA registers
18612 which we only allow to hold FP values. */
18613 return (TARGET_HARD_FLOAT && TARGET_FPA
18614 && GET_MODE_CLASS (mode) == MODE_FLOAT
18615 && regno >= FIRST_FPA_REGNUM
18616 && regno <= LAST_FPA_REGNUM);
18619 /* Implement MODES_TIEABLE_P. */
18622 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18624 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18627 /* We specifically want to allow elements of "structure" modes to
18628 be tieable to the structure. This more general condition allows
18629 other rarer situations too. */
18631 && (VALID_NEON_DREG_MODE (mode1)
18632 || VALID_NEON_QREG_MODE (mode1)
18633 || VALID_NEON_STRUCT_MODE (mode1))
18634 && (VALID_NEON_DREG_MODE (mode2)
18635 || VALID_NEON_QREG_MODE (mode2)
18636 || VALID_NEON_STRUCT_MODE (mode2)))
18642 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18643 not used in arm mode. */
18646 arm_regno_class (int regno)
18650 if (regno == STACK_POINTER_REGNUM)
18652 if (regno == CC_REGNUM)
18659 if (TARGET_THUMB2 && regno < 8)
18662 if ( regno <= LAST_ARM_REGNUM
18663 || regno == FRAME_POINTER_REGNUM
18664 || regno == ARG_POINTER_REGNUM)
18665 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18667 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18668 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18670 if (IS_CIRRUS_REGNUM (regno))
18671 return CIRRUS_REGS;
18673 if (IS_VFP_REGNUM (regno))
18675 if (regno <= D7_VFP_REGNUM)
18676 return VFP_D0_D7_REGS;
18677 else if (regno <= LAST_LO_VFP_REGNUM)
18678 return VFP_LO_REGS;
18680 return VFP_HI_REGS;
18683 if (IS_IWMMXT_REGNUM (regno))
18684 return IWMMXT_REGS;
18686 if (IS_IWMMXT_GR_REGNUM (regno))
18687 return IWMMXT_GR_REGS;
18692 /* Handle a special case when computing the offset
18693 of an argument from the frame pointer. */
18695 arm_debugger_arg_offset (int value, rtx addr)
18699 /* We are only interested if dbxout_parms() failed to compute the offset. */
18703 /* We can only cope with the case where the address is held in a register. */
18704 if (GET_CODE (addr) != REG)
18707 /* If we are using the frame pointer to point at the argument, then
18708 an offset of 0 is correct. */
18709 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18712 /* If we are using the stack pointer to point at the
18713 argument, then an offset of 0 is correct. */
18714 /* ??? Check this is consistent with thumb2 frame layout. */
18715 if ((TARGET_THUMB || !frame_pointer_needed)
18716 && REGNO (addr) == SP_REGNUM)
18719 /* Oh dear. The argument is pointed to by a register rather
18720 than being held in a register, or being stored at a known
18721 offset from the frame pointer. Since GDB only understands
18722 those two kinds of argument we must translate the address
18723 held in the register into an offset from the frame pointer.
18724 We do this by searching through the insns for the function
18725 looking to see where this register gets its value. If the
18726 register is initialized from the frame pointer plus an offset
18727 then we are in luck and we can continue, otherwise we give up.
18729 This code is exercised by producing debugging information
18730 for a function with arguments like this:
18732 double func (double a, double b, int c, double d) {return d;}
18734 Without this code the stab for parameter 'd' will be set to
18735 an offset of 0 from the frame pointer, rather than 8. */
18737 /* The if() statement says:
18739 If the insn is a normal instruction
18740 and if the insn is setting the value in a register
18741 and if the register being set is the register holding the address of the argument
18742 and if the address is computing by an addition
18743 that involves adding to a register
18744 which is the frame pointer
18749 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18751 if ( GET_CODE (insn) == INSN
18752 && GET_CODE (PATTERN (insn)) == SET
18753 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18754 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18755 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18756 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18757 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18760 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18769 warning (0, "unable to compute real location of stacked parameter");
18770 value = 8; /* XXX magic hack */
18790 T_MAX /* Size of enum. Keep last. */
18791 } neon_builtin_type_mode;
18793 #define TYPE_MODE_BIT(X) (1 << (X))
18795 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18796 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18797 | TYPE_MODE_BIT (T_DI))
18798 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18799 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18800 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18802 #define v8qi_UP T_V8QI
18803 #define v4hi_UP T_V4HI
18804 #define v2si_UP T_V2SI
18805 #define v2sf_UP T_V2SF
18807 #define v16qi_UP T_V16QI
18808 #define v8hi_UP T_V8HI
18809 #define v4si_UP T_V4SI
18810 #define v4sf_UP T_V4SF
18811 #define v2di_UP T_V2DI
18816 #define UP(X) X##_UP
18849 NEON_LOADSTRUCTLANE,
18851 NEON_STORESTRUCTLANE,
18860 const neon_itype itype;
18861 const neon_builtin_type_mode mode;
18862 const enum insn_code code;
18863 unsigned int fcode;
18864 } neon_builtin_datum;
18866 #define CF(N,X) CODE_FOR_neon_##N##X
18868 #define VAR1(T, N, A) \
18869 {#N, NEON_##T, UP (A), CF (N, A), 0}
18870 #define VAR2(T, N, A, B) \
18872 {#N, NEON_##T, UP (B), CF (N, B), 0}
18873 #define VAR3(T, N, A, B, C) \
18874 VAR2 (T, N, A, B), \
18875 {#N, NEON_##T, UP (C), CF (N, C), 0}
18876 #define VAR4(T, N, A, B, C, D) \
18877 VAR3 (T, N, A, B, C), \
18878 {#N, NEON_##T, UP (D), CF (N, D), 0}
18879 #define VAR5(T, N, A, B, C, D, E) \
18880 VAR4 (T, N, A, B, C, D), \
18881 {#N, NEON_##T, UP (E), CF (N, E), 0}
18882 #define VAR6(T, N, A, B, C, D, E, F) \
18883 VAR5 (T, N, A, B, C, D, E), \
18884 {#N, NEON_##T, UP (F), CF (N, F), 0}
18885 #define VAR7(T, N, A, B, C, D, E, F, G) \
18886 VAR6 (T, N, A, B, C, D, E, F), \
18887 {#N, NEON_##T, UP (G), CF (N, G), 0}
18888 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18889 VAR7 (T, N, A, B, C, D, E, F, G), \
18890 {#N, NEON_##T, UP (H), CF (N, H), 0}
18891 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18892 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18893 {#N, NEON_##T, UP (I), CF (N, I), 0}
18894 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18895 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18896 {#N, NEON_##T, UP (J), CF (N, J), 0}
18898 /* The mode entries in the following table correspond to the "key" type of the
18899 instruction variant, i.e. equivalent to that which would be specified after
18900 the assembler mnemonic, which usually refers to the last vector operand.
18901 (Signed/unsigned/polynomial types are not differentiated between though, and
18902 are all mapped onto the same mode for a given element size.) The modes
18903 listed per instruction should be the same as those defined for that
18904 instruction's pattern in neon.md. */
18906 static neon_builtin_datum neon_builtin_data[] =
18908 VAR10 (BINOP, vadd,
18909 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18910 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18911 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18912 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18913 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18914 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18915 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18916 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18917 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18918 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18919 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18920 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18921 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18922 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18923 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18924 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18925 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18926 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18927 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18928 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18929 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18930 VAR2 (BINOP, vqdmull, v4hi, v2si),
18931 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18932 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18933 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18934 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18935 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18936 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18937 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18938 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18939 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18940 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18941 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18942 VAR10 (BINOP, vsub,
18943 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18944 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18945 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18946 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18947 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18948 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18949 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18950 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18951 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18952 VAR2 (BINOP, vcage, v2sf, v4sf),
18953 VAR2 (BINOP, vcagt, v2sf, v4sf),
18954 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18955 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18956 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18957 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18958 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18959 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18960 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18961 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18962 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18963 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18964 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18965 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18966 VAR2 (BINOP, vrecps, v2sf, v4sf),
18967 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18968 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18969 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18970 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18971 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18972 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18973 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18974 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18975 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18976 VAR2 (UNOP, vcnt, v8qi, v16qi),
18977 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18978 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18979 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18980 /* FIXME: vget_lane supports more variants than this! */
18981 VAR10 (GETLANE, vget_lane,
18982 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18983 VAR10 (SETLANE, vset_lane,
18984 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18985 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18986 VAR10 (DUP, vdup_n,
18987 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18988 VAR10 (DUPLANE, vdup_lane,
18989 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18990 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18991 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18992 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18993 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18994 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18995 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18996 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18997 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18998 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18999 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19000 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19001 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19002 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19003 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19004 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19005 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19006 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19007 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19008 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19009 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19010 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19011 VAR10 (BINOP, vext,
19012 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19013 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19014 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19015 VAR2 (UNOP, vrev16, v8qi, v16qi),
19016 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19017 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19018 VAR10 (SELECT, vbsl,
19019 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19020 VAR1 (VTBL, vtbl1, v8qi),
19021 VAR1 (VTBL, vtbl2, v8qi),
19022 VAR1 (VTBL, vtbl3, v8qi),
19023 VAR1 (VTBL, vtbl4, v8qi),
19024 VAR1 (VTBX, vtbx1, v8qi),
19025 VAR1 (VTBX, vtbx2, v8qi),
19026 VAR1 (VTBX, vtbx3, v8qi),
19027 VAR1 (VTBX, vtbx4, v8qi),
19028 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19029 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19030 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19031 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19032 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19033 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19034 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19035 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19036 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19037 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19038 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19039 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19040 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19041 VAR10 (LOAD1, vld1,
19042 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19043 VAR10 (LOAD1LANE, vld1_lane,
19044 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19045 VAR10 (LOAD1, vld1_dup,
19046 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19047 VAR10 (STORE1, vst1,
19048 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19049 VAR10 (STORE1LANE, vst1_lane,
19050 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19052 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19053 VAR7 (LOADSTRUCTLANE, vld2_lane,
19054 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19055 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19056 VAR9 (STORESTRUCT, vst2,
19057 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19058 VAR7 (STORESTRUCTLANE, vst2_lane,
19059 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19061 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19062 VAR7 (LOADSTRUCTLANE, vld3_lane,
19063 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19064 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19065 VAR9 (STORESTRUCT, vst3,
19066 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19067 VAR7 (STORESTRUCTLANE, vst3_lane,
19068 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19069 VAR9 (LOADSTRUCT, vld4,
19070 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19071 VAR7 (LOADSTRUCTLANE, vld4_lane,
19072 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19073 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19074 VAR9 (STORESTRUCT, vst4,
19075 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19076 VAR7 (STORESTRUCTLANE, vst4_lane,
19077 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19078 VAR10 (LOGICBINOP, vand,
19079 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19080 VAR10 (LOGICBINOP, vorr,
19081 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19082 VAR10 (BINOP, veor,
19083 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19084 VAR10 (LOGICBINOP, vbic,
19085 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19086 VAR10 (LOGICBINOP, vorn,
19087 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19102 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19103 symbolic names defined here (which would require too much duplication).
19107 ARM_BUILTIN_GETWCX,
19108 ARM_BUILTIN_SETWCX,
19112 ARM_BUILTIN_WAVG2BR,
19113 ARM_BUILTIN_WAVG2HR,
19114 ARM_BUILTIN_WAVG2B,
19115 ARM_BUILTIN_WAVG2H,
19122 ARM_BUILTIN_WMACSZ,
19124 ARM_BUILTIN_WMACUZ,
19127 ARM_BUILTIN_WSADBZ,
19129 ARM_BUILTIN_WSADHZ,
19131 ARM_BUILTIN_WALIGN,
19134 ARM_BUILTIN_TMIAPH,
19135 ARM_BUILTIN_TMIABB,
19136 ARM_BUILTIN_TMIABT,
19137 ARM_BUILTIN_TMIATB,
19138 ARM_BUILTIN_TMIATT,
19140 ARM_BUILTIN_TMOVMSKB,
19141 ARM_BUILTIN_TMOVMSKH,
19142 ARM_BUILTIN_TMOVMSKW,
19144 ARM_BUILTIN_TBCSTB,
19145 ARM_BUILTIN_TBCSTH,
19146 ARM_BUILTIN_TBCSTW,
19148 ARM_BUILTIN_WMADDS,
19149 ARM_BUILTIN_WMADDU,
19151 ARM_BUILTIN_WPACKHSS,
19152 ARM_BUILTIN_WPACKWSS,
19153 ARM_BUILTIN_WPACKDSS,
19154 ARM_BUILTIN_WPACKHUS,
19155 ARM_BUILTIN_WPACKWUS,
19156 ARM_BUILTIN_WPACKDUS,
19161 ARM_BUILTIN_WADDSSB,
19162 ARM_BUILTIN_WADDSSH,
19163 ARM_BUILTIN_WADDSSW,
19164 ARM_BUILTIN_WADDUSB,
19165 ARM_BUILTIN_WADDUSH,
19166 ARM_BUILTIN_WADDUSW,
19170 ARM_BUILTIN_WSUBSSB,
19171 ARM_BUILTIN_WSUBSSH,
19172 ARM_BUILTIN_WSUBSSW,
19173 ARM_BUILTIN_WSUBUSB,
19174 ARM_BUILTIN_WSUBUSH,
19175 ARM_BUILTIN_WSUBUSW,
19182 ARM_BUILTIN_WCMPEQB,
19183 ARM_BUILTIN_WCMPEQH,
19184 ARM_BUILTIN_WCMPEQW,
19185 ARM_BUILTIN_WCMPGTUB,
19186 ARM_BUILTIN_WCMPGTUH,
19187 ARM_BUILTIN_WCMPGTUW,
19188 ARM_BUILTIN_WCMPGTSB,
19189 ARM_BUILTIN_WCMPGTSH,
19190 ARM_BUILTIN_WCMPGTSW,
19192 ARM_BUILTIN_TEXTRMSB,
19193 ARM_BUILTIN_TEXTRMSH,
19194 ARM_BUILTIN_TEXTRMSW,
19195 ARM_BUILTIN_TEXTRMUB,
19196 ARM_BUILTIN_TEXTRMUH,
19197 ARM_BUILTIN_TEXTRMUW,
19198 ARM_BUILTIN_TINSRB,
19199 ARM_BUILTIN_TINSRH,
19200 ARM_BUILTIN_TINSRW,
19202 ARM_BUILTIN_WMAXSW,
19203 ARM_BUILTIN_WMAXSH,
19204 ARM_BUILTIN_WMAXSB,
19205 ARM_BUILTIN_WMAXUW,
19206 ARM_BUILTIN_WMAXUH,
19207 ARM_BUILTIN_WMAXUB,
19208 ARM_BUILTIN_WMINSW,
19209 ARM_BUILTIN_WMINSH,
19210 ARM_BUILTIN_WMINSB,
19211 ARM_BUILTIN_WMINUW,
19212 ARM_BUILTIN_WMINUH,
19213 ARM_BUILTIN_WMINUB,
19215 ARM_BUILTIN_WMULUM,
19216 ARM_BUILTIN_WMULSM,
19217 ARM_BUILTIN_WMULUL,
19219 ARM_BUILTIN_PSADBH,
19220 ARM_BUILTIN_WSHUFH,
19234 ARM_BUILTIN_WSLLHI,
19235 ARM_BUILTIN_WSLLWI,
19236 ARM_BUILTIN_WSLLDI,
19237 ARM_BUILTIN_WSRAHI,
19238 ARM_BUILTIN_WSRAWI,
19239 ARM_BUILTIN_WSRADI,
19240 ARM_BUILTIN_WSRLHI,
19241 ARM_BUILTIN_WSRLWI,
19242 ARM_BUILTIN_WSRLDI,
19243 ARM_BUILTIN_WRORHI,
19244 ARM_BUILTIN_WRORWI,
19245 ARM_BUILTIN_WRORDI,
19247 ARM_BUILTIN_WUNPCKIHB,
19248 ARM_BUILTIN_WUNPCKIHH,
19249 ARM_BUILTIN_WUNPCKIHW,
19250 ARM_BUILTIN_WUNPCKILB,
19251 ARM_BUILTIN_WUNPCKILH,
19252 ARM_BUILTIN_WUNPCKILW,
19254 ARM_BUILTIN_WUNPCKEHSB,
19255 ARM_BUILTIN_WUNPCKEHSH,
19256 ARM_BUILTIN_WUNPCKEHSW,
19257 ARM_BUILTIN_WUNPCKEHUB,
19258 ARM_BUILTIN_WUNPCKEHUH,
19259 ARM_BUILTIN_WUNPCKEHUW,
19260 ARM_BUILTIN_WUNPCKELSB,
19261 ARM_BUILTIN_WUNPCKELSH,
19262 ARM_BUILTIN_WUNPCKELSW,
19263 ARM_BUILTIN_WUNPCKELUB,
19264 ARM_BUILTIN_WUNPCKELUH,
19265 ARM_BUILTIN_WUNPCKELUW,
19267 ARM_BUILTIN_THREAD_POINTER,
19269 ARM_BUILTIN_NEON_BASE,
19271 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19274 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19277 arm_init_neon_builtins (void)
19279 unsigned int i, fcode;
19282 tree neon_intQI_type_node;
19283 tree neon_intHI_type_node;
19284 tree neon_polyQI_type_node;
19285 tree neon_polyHI_type_node;
19286 tree neon_intSI_type_node;
19287 tree neon_intDI_type_node;
19288 tree neon_float_type_node;
19290 tree intQI_pointer_node;
19291 tree intHI_pointer_node;
19292 tree intSI_pointer_node;
19293 tree intDI_pointer_node;
19294 tree float_pointer_node;
19296 tree const_intQI_node;
19297 tree const_intHI_node;
19298 tree const_intSI_node;
19299 tree const_intDI_node;
19300 tree const_float_node;
19302 tree const_intQI_pointer_node;
19303 tree const_intHI_pointer_node;
19304 tree const_intSI_pointer_node;
19305 tree const_intDI_pointer_node;
19306 tree const_float_pointer_node;
19308 tree V8QI_type_node;
19309 tree V4HI_type_node;
19310 tree V2SI_type_node;
19311 tree V2SF_type_node;
19312 tree V16QI_type_node;
19313 tree V8HI_type_node;
19314 tree V4SI_type_node;
19315 tree V4SF_type_node;
19316 tree V2DI_type_node;
19318 tree intUQI_type_node;
19319 tree intUHI_type_node;
19320 tree intUSI_type_node;
19321 tree intUDI_type_node;
19323 tree intEI_type_node;
19324 tree intOI_type_node;
19325 tree intCI_type_node;
19326 tree intXI_type_node;
19328 tree V8QI_pointer_node;
19329 tree V4HI_pointer_node;
19330 tree V2SI_pointer_node;
19331 tree V2SF_pointer_node;
19332 tree V16QI_pointer_node;
19333 tree V8HI_pointer_node;
19334 tree V4SI_pointer_node;
19335 tree V4SF_pointer_node;
19336 tree V2DI_pointer_node;
19338 tree void_ftype_pv8qi_v8qi_v8qi;
19339 tree void_ftype_pv4hi_v4hi_v4hi;
19340 tree void_ftype_pv2si_v2si_v2si;
19341 tree void_ftype_pv2sf_v2sf_v2sf;
19342 tree void_ftype_pdi_di_di;
19343 tree void_ftype_pv16qi_v16qi_v16qi;
19344 tree void_ftype_pv8hi_v8hi_v8hi;
19345 tree void_ftype_pv4si_v4si_v4si;
19346 tree void_ftype_pv4sf_v4sf_v4sf;
19347 tree void_ftype_pv2di_v2di_v2di;
19349 tree reinterp_ftype_dreg[5][5];
19350 tree reinterp_ftype_qreg[5][5];
19351 tree dreg_types[5], qreg_types[5];
19353 /* Create distinguished type nodes for NEON vector element types,
19354 and pointers to values of such types, so we can detect them later. */
19355 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19356 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19357 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19358 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19359 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19360 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19361 neon_float_type_node = make_node (REAL_TYPE);
19362 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19363 layout_type (neon_float_type_node);
19365 /* Define typedefs which exactly correspond to the modes we are basing vector
19366 types on. If you change these names you'll need to change
19367 the table used by arm_mangle_type too. */
19368 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19369 "__builtin_neon_qi");
19370 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19371 "__builtin_neon_hi");
19372 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19373 "__builtin_neon_si");
19374 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19375 "__builtin_neon_sf");
19376 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19377 "__builtin_neon_di");
19378 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19379 "__builtin_neon_poly8");
19380 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19381 "__builtin_neon_poly16");
19383 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19384 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19385 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19386 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19387 float_pointer_node = build_pointer_type (neon_float_type_node);
19389 /* Next create constant-qualified versions of the above types. */
19390 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19392 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19394 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19396 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19398 const_float_node = build_qualified_type (neon_float_type_node,
19401 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19402 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19403 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19404 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19405 const_float_pointer_node = build_pointer_type (const_float_node);
19407 /* Now create vector types based on our NEON element types. */
19408 /* 64-bit vectors. */
19410 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19412 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19414 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19416 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19417 /* 128-bit vectors. */
19419 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19421 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19423 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19425 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19427 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19429 /* Unsigned integer types for various mode sizes. */
19430 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19431 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19432 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19433 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19435 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19436 "__builtin_neon_uqi");
19437 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19438 "__builtin_neon_uhi");
19439 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19440 "__builtin_neon_usi");
19441 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19442 "__builtin_neon_udi");
19444 /* Opaque integer types for structures of vectors. */
19445 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19446 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19447 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19448 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19450 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19451 "__builtin_neon_ti");
19452 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19453 "__builtin_neon_ei");
19454 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19455 "__builtin_neon_oi");
19456 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19457 "__builtin_neon_ci");
19458 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19459 "__builtin_neon_xi");
19461 /* Pointers to vector types. */
19462 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19463 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19464 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19465 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19466 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19467 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19468 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19469 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19470 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19472 /* Operations which return results as pairs. */
19473 void_ftype_pv8qi_v8qi_v8qi =
19474 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19475 V8QI_type_node, NULL);
19476 void_ftype_pv4hi_v4hi_v4hi =
19477 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19478 V4HI_type_node, NULL);
19479 void_ftype_pv2si_v2si_v2si =
19480 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19481 V2SI_type_node, NULL);
19482 void_ftype_pv2sf_v2sf_v2sf =
19483 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19484 V2SF_type_node, NULL);
19485 void_ftype_pdi_di_di =
19486 build_function_type_list (void_type_node, intDI_pointer_node,
19487 neon_intDI_type_node, neon_intDI_type_node, NULL);
19488 void_ftype_pv16qi_v16qi_v16qi =
19489 build_function_type_list (void_type_node, V16QI_pointer_node,
19490 V16QI_type_node, V16QI_type_node, NULL);
19491 void_ftype_pv8hi_v8hi_v8hi =
19492 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19493 V8HI_type_node, NULL);
19494 void_ftype_pv4si_v4si_v4si =
19495 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19496 V4SI_type_node, NULL);
19497 void_ftype_pv4sf_v4sf_v4sf =
19498 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19499 V4SF_type_node, NULL);
19500 void_ftype_pv2di_v2di_v2di =
19501 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19502 V2DI_type_node, NULL);
19504 dreg_types[0] = V8QI_type_node;
19505 dreg_types[1] = V4HI_type_node;
19506 dreg_types[2] = V2SI_type_node;
19507 dreg_types[3] = V2SF_type_node;
19508 dreg_types[4] = neon_intDI_type_node;
19510 qreg_types[0] = V16QI_type_node;
19511 qreg_types[1] = V8HI_type_node;
19512 qreg_types[2] = V4SI_type_node;
19513 qreg_types[3] = V4SF_type_node;
19514 qreg_types[4] = V2DI_type_node;
19516 for (i = 0; i < 5; i++)
19519 for (j = 0; j < 5; j++)
19521 reinterp_ftype_dreg[i][j]
19522 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19523 reinterp_ftype_qreg[i][j]
19524 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19528 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19529 i < ARRAY_SIZE (neon_builtin_data);
19532 neon_builtin_datum *d = &neon_builtin_data[i];
19534 const char* const modenames[] = {
19535 "v8qi", "v4hi", "v2si", "v2sf", "di",
19536 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19541 int is_load = 0, is_store = 0;
19543 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19550 case NEON_LOAD1LANE:
19551 case NEON_LOADSTRUCT:
19552 case NEON_LOADSTRUCTLANE:
19554 /* Fall through. */
19556 case NEON_STORE1LANE:
19557 case NEON_STORESTRUCT:
19558 case NEON_STORESTRUCTLANE:
19561 /* Fall through. */
19564 case NEON_LOGICBINOP:
19565 case NEON_SHIFTINSERT:
19572 case NEON_SHIFTIMM:
19573 case NEON_SHIFTACC:
19579 case NEON_LANEMULL:
19580 case NEON_LANEMULH:
19582 case NEON_SCALARMUL:
19583 case NEON_SCALARMULL:
19584 case NEON_SCALARMULH:
19585 case NEON_SCALARMAC:
19591 tree return_type = void_type_node, args = void_list_node;
19593 /* Build a function type directly from the insn_data for
19594 this builtin. The build_function_type() function takes
19595 care of removing duplicates for us. */
19596 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19600 if (is_load && k == 1)
19602 /* Neon load patterns always have the memory
19603 operand in the operand 1 position. */
19604 gcc_assert (insn_data[d->code].operand[k].predicate
19605 == neon_struct_operand);
19611 eltype = const_intQI_pointer_node;
19616 eltype = const_intHI_pointer_node;
19621 eltype = const_intSI_pointer_node;
19626 eltype = const_float_pointer_node;
19631 eltype = const_intDI_pointer_node;
19634 default: gcc_unreachable ();
19637 else if (is_store && k == 0)
19639 /* Similarly, Neon store patterns use operand 0 as
19640 the memory location to store to. */
19641 gcc_assert (insn_data[d->code].operand[k].predicate
19642 == neon_struct_operand);
19648 eltype = intQI_pointer_node;
19653 eltype = intHI_pointer_node;
19658 eltype = intSI_pointer_node;
19663 eltype = float_pointer_node;
19668 eltype = intDI_pointer_node;
19671 default: gcc_unreachable ();
19676 switch (insn_data[d->code].operand[k].mode)
19678 case VOIDmode: eltype = void_type_node; break;
19680 case QImode: eltype = neon_intQI_type_node; break;
19681 case HImode: eltype = neon_intHI_type_node; break;
19682 case SImode: eltype = neon_intSI_type_node; break;
19683 case SFmode: eltype = neon_float_type_node; break;
19684 case DImode: eltype = neon_intDI_type_node; break;
19685 case TImode: eltype = intTI_type_node; break;
19686 case EImode: eltype = intEI_type_node; break;
19687 case OImode: eltype = intOI_type_node; break;
19688 case CImode: eltype = intCI_type_node; break;
19689 case XImode: eltype = intXI_type_node; break;
19690 /* 64-bit vectors. */
19691 case V8QImode: eltype = V8QI_type_node; break;
19692 case V4HImode: eltype = V4HI_type_node; break;
19693 case V2SImode: eltype = V2SI_type_node; break;
19694 case V2SFmode: eltype = V2SF_type_node; break;
19695 /* 128-bit vectors. */
19696 case V16QImode: eltype = V16QI_type_node; break;
19697 case V8HImode: eltype = V8HI_type_node; break;
19698 case V4SImode: eltype = V4SI_type_node; break;
19699 case V4SFmode: eltype = V4SF_type_node; break;
19700 case V2DImode: eltype = V2DI_type_node; break;
19701 default: gcc_unreachable ();
19705 if (k == 0 && !is_store)
19706 return_type = eltype;
19708 args = tree_cons (NULL_TREE, eltype, args);
19711 ftype = build_function_type (return_type, args);
19715 case NEON_RESULTPAIR:
19717 switch (insn_data[d->code].operand[1].mode)
19719 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19720 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19721 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19722 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19723 case DImode: ftype = void_ftype_pdi_di_di; break;
19724 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19725 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19726 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19727 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19728 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19729 default: gcc_unreachable ();
19734 case NEON_REINTERP:
19736 /* We iterate over 5 doubleword types, then 5 quadword
19738 int rhs = d->mode % 5;
19739 switch (insn_data[d->code].operand[0].mode)
19741 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19742 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19743 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19744 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19745 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19746 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19747 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19748 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19749 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19750 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19751 default: gcc_unreachable ();
19757 gcc_unreachable ();
19760 gcc_assert (ftype != NULL);
19762 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19764 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19766 arm_builtin_decls[fcode] = decl;
19770 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19773 if ((MASK) & insn_flags) \
19776 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19777 BUILT_IN_MD, NULL, NULL_TREE); \
19778 arm_builtin_decls[CODE] = bdecl; \
19783 struct builtin_description
19785 const unsigned int mask;
19786 const enum insn_code icode;
19787 const char * const name;
19788 const enum arm_builtins code;
19789 const enum rtx_code comparison;
19790 const unsigned int flag;
19793 static const struct builtin_description bdesc_2arg[] =
19795 #define IWMMXT_BUILTIN(code, string, builtin) \
19796 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19797 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19799 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19800 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19801 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19802 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19803 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19804 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19805 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19806 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19807 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19808 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19809 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19810 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19811 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19812 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19813 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19814 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19815 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19816 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19817 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19818 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19819 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19820 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19821 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19822 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19823 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19824 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19825 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19826 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19827 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19828 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19829 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19830 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19831 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19832 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19833 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19834 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19835 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19836 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19837 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19838 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19839 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19840 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19841 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19842 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19843 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19844 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19845 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19846 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19847 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19848 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19849 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19850 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19851 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19852 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19853 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19854 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19855 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19856 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19858 #define IWMMXT_BUILTIN2(code, builtin) \
19859 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19861 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19862 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19863 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19864 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19865 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19866 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19867 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
19868 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
19869 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
19870 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
19871 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
19872 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
19873 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
19874 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
19875 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
19876 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
19877 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
19878 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
19879 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
19880 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
19881 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
19882 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
19883 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
19884 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
19885 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
19886 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
19887 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
19888 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
19889 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
19890 IWMMXT_BUILTIN2 (rordi3, WRORDI)
19891 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19892 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19895 static const struct builtin_description bdesc_1arg[] =
19897 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19898 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19899 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19900 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19901 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19902 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19903 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19904 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19905 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19906 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19907 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19908 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19909 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19910 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19911 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19912 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19913 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19914 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19917 /* Set up all the iWMMXt builtins. This is not called if
19918 TARGET_IWMMXT is zero. */
19921 arm_init_iwmmxt_builtins (void)
19923 const struct builtin_description * d;
19926 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19927 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19928 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19931 = build_function_type_list (integer_type_node,
19932 integer_type_node, NULL_TREE);
19933 tree v8qi_ftype_v8qi_v8qi_int
19934 = build_function_type_list (V8QI_type_node,
19935 V8QI_type_node, V8QI_type_node,
19936 integer_type_node, NULL_TREE);
19937 tree v4hi_ftype_v4hi_int
19938 = build_function_type_list (V4HI_type_node,
19939 V4HI_type_node, integer_type_node, NULL_TREE);
19940 tree v2si_ftype_v2si_int
19941 = build_function_type_list (V2SI_type_node,
19942 V2SI_type_node, integer_type_node, NULL_TREE);
19943 tree v2si_ftype_di_di
19944 = build_function_type_list (V2SI_type_node,
19945 long_long_integer_type_node,
19946 long_long_integer_type_node,
19948 tree di_ftype_di_int
19949 = build_function_type_list (long_long_integer_type_node,
19950 long_long_integer_type_node,
19951 integer_type_node, NULL_TREE);
19952 tree di_ftype_di_int_int
19953 = build_function_type_list (long_long_integer_type_node,
19954 long_long_integer_type_node,
19956 integer_type_node, NULL_TREE);
19957 tree int_ftype_v8qi
19958 = build_function_type_list (integer_type_node,
19959 V8QI_type_node, NULL_TREE);
19960 tree int_ftype_v4hi
19961 = build_function_type_list (integer_type_node,
19962 V4HI_type_node, NULL_TREE);
19963 tree int_ftype_v2si
19964 = build_function_type_list (integer_type_node,
19965 V2SI_type_node, NULL_TREE);
19966 tree int_ftype_v8qi_int
19967 = build_function_type_list (integer_type_node,
19968 V8QI_type_node, integer_type_node, NULL_TREE);
19969 tree int_ftype_v4hi_int
19970 = build_function_type_list (integer_type_node,
19971 V4HI_type_node, integer_type_node, NULL_TREE);
19972 tree int_ftype_v2si_int
19973 = build_function_type_list (integer_type_node,
19974 V2SI_type_node, integer_type_node, NULL_TREE);
19975 tree v8qi_ftype_v8qi_int_int
19976 = build_function_type_list (V8QI_type_node,
19977 V8QI_type_node, integer_type_node,
19978 integer_type_node, NULL_TREE);
19979 tree v4hi_ftype_v4hi_int_int
19980 = build_function_type_list (V4HI_type_node,
19981 V4HI_type_node, integer_type_node,
19982 integer_type_node, NULL_TREE);
19983 tree v2si_ftype_v2si_int_int
19984 = build_function_type_list (V2SI_type_node,
19985 V2SI_type_node, integer_type_node,
19986 integer_type_node, NULL_TREE);
19987 /* Miscellaneous. */
19988 tree v8qi_ftype_v4hi_v4hi
19989 = build_function_type_list (V8QI_type_node,
19990 V4HI_type_node, V4HI_type_node, NULL_TREE);
19991 tree v4hi_ftype_v2si_v2si
19992 = build_function_type_list (V4HI_type_node,
19993 V2SI_type_node, V2SI_type_node, NULL_TREE);
19994 tree v2si_ftype_v4hi_v4hi
19995 = build_function_type_list (V2SI_type_node,
19996 V4HI_type_node, V4HI_type_node, NULL_TREE);
19997 tree v2si_ftype_v8qi_v8qi
19998 = build_function_type_list (V2SI_type_node,
19999 V8QI_type_node, V8QI_type_node, NULL_TREE);
20000 tree v4hi_ftype_v4hi_di
20001 = build_function_type_list (V4HI_type_node,
20002 V4HI_type_node, long_long_integer_type_node,
20004 tree v2si_ftype_v2si_di
20005 = build_function_type_list (V2SI_type_node,
20006 V2SI_type_node, long_long_integer_type_node,
20008 tree void_ftype_int_int
20009 = build_function_type_list (void_type_node,
20010 integer_type_node, integer_type_node,
20013 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20015 = build_function_type_list (long_long_integer_type_node,
20016 V8QI_type_node, NULL_TREE);
20018 = build_function_type_list (long_long_integer_type_node,
20019 V4HI_type_node, NULL_TREE);
20021 = build_function_type_list (long_long_integer_type_node,
20022 V2SI_type_node, NULL_TREE);
20023 tree v2si_ftype_v4hi
20024 = build_function_type_list (V2SI_type_node,
20025 V4HI_type_node, NULL_TREE);
20026 tree v4hi_ftype_v8qi
20027 = build_function_type_list (V4HI_type_node,
20028 V8QI_type_node, NULL_TREE);
20030 tree di_ftype_di_v4hi_v4hi
20031 = build_function_type_list (long_long_unsigned_type_node,
20032 long_long_unsigned_type_node,
20033 V4HI_type_node, V4HI_type_node,
20036 tree di_ftype_v4hi_v4hi
20037 = build_function_type_list (long_long_unsigned_type_node,
20038 V4HI_type_node,V4HI_type_node,
20041 /* Normal vector binops. */
20042 tree v8qi_ftype_v8qi_v8qi
20043 = build_function_type_list (V8QI_type_node,
20044 V8QI_type_node, V8QI_type_node, NULL_TREE);
20045 tree v4hi_ftype_v4hi_v4hi
20046 = build_function_type_list (V4HI_type_node,
20047 V4HI_type_node,V4HI_type_node, NULL_TREE);
20048 tree v2si_ftype_v2si_v2si
20049 = build_function_type_list (V2SI_type_node,
20050 V2SI_type_node, V2SI_type_node, NULL_TREE);
20051 tree di_ftype_di_di
20052 = build_function_type_list (long_long_unsigned_type_node,
20053 long_long_unsigned_type_node,
20054 long_long_unsigned_type_node,
20057 /* Add all builtins that are more or less simple operations on two
20059 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20061 /* Use one of the operands; the target can have a different mode for
20062 mask-generating compares. */
20063 enum machine_mode mode;
20069 mode = insn_data[d->icode].operand[1].mode;
20074 type = v8qi_ftype_v8qi_v8qi;
20077 type = v4hi_ftype_v4hi_v4hi;
20080 type = v2si_ftype_v2si_v2si;
20083 type = di_ftype_di_di;
20087 gcc_unreachable ();
20090 def_mbuiltin (d->mask, d->name, type, d->code);
20093 /* Add the remaining MMX insns with somewhat more complicated types. */
20094 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20095 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20096 ARM_BUILTIN_ ## CODE)
20098 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20099 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20100 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20102 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20103 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20104 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20105 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20106 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20107 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20109 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20110 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20111 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20112 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20113 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20114 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20116 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20117 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20118 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20119 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20120 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20121 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20123 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20124 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20125 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20126 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20127 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20128 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20130 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20132 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20133 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20134 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20135 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20137 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20138 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20139 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20140 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20141 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20142 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20143 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20144 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20145 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20147 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20148 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20149 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20151 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20152 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20153 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20155 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20156 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20157 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20158 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20159 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20160 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20162 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20163 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20164 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20165 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20166 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20167 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20168 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20169 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20170 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20171 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20172 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20173 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20175 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20176 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20177 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20178 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20180 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20181 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20182 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20183 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20184 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20185 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20186 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20188 #undef iwmmx_mbuiltin
20192 arm_init_tls_builtins (void)
20196 ftype = build_function_type (ptr_type_node, void_list_node);
20197 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20198 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20200 TREE_NOTHROW (decl) = 1;
20201 TREE_READONLY (decl) = 1;
20202 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20206 arm_init_fp16_builtins (void)
20208 tree fp16_type = make_node (REAL_TYPE);
20209 TYPE_PRECISION (fp16_type) = 16;
20210 layout_type (fp16_type);
20211 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20215 arm_init_builtins (void)
20217 arm_init_tls_builtins ();
20219 if (TARGET_REALLY_IWMMXT)
20220 arm_init_iwmmxt_builtins ();
20223 arm_init_neon_builtins ();
20225 if (arm_fp16_format)
20226 arm_init_fp16_builtins ();
20229 /* Return the ARM builtin for CODE. */
20232 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20234 if (code >= ARM_BUILTIN_MAX)
20235 return error_mark_node;
20237 return arm_builtin_decls[code];
20240 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20242 static const char *
20243 arm_invalid_parameter_type (const_tree t)
20245 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20246 return N_("function parameters cannot have __fp16 type");
20250 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20252 static const char *
20253 arm_invalid_return_type (const_tree t)
20255 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20256 return N_("functions cannot return __fp16 type");
20260 /* Implement TARGET_PROMOTED_TYPE. */
20263 arm_promoted_type (const_tree t)
20265 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20266 return float_type_node;
20270 /* Implement TARGET_CONVERT_TO_TYPE.
20271 Specifically, this hook implements the peculiarity of the ARM
20272 half-precision floating-point C semantics that requires conversions between
20273 __fp16 to or from double to do an intermediate conversion to float. */
20276 arm_convert_to_type (tree type, tree expr)
20278 tree fromtype = TREE_TYPE (expr);
20279 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20281 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20282 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20283 return convert (type, convert (float_type_node, expr));
20287 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20288 This simply adds HFmode as a supported mode; even though we don't
20289 implement arithmetic on this type directly, it's supported by
20290 optabs conversions, much the way the double-word arithmetic is
20291 special-cased in the default hook. */
20294 arm_scalar_mode_supported_p (enum machine_mode mode)
20296 if (mode == HFmode)
20297 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20298 else if (ALL_FIXED_POINT_MODE_P (mode))
20301 return default_scalar_mode_supported_p (mode);
20304 /* Errors in the source file can cause expand_expr to return const0_rtx
20305 where we expect a vector. To avoid crashing, use one of the vector
20306 clear instructions. */
20309 safe_vector_operand (rtx x, enum machine_mode mode)
20311 if (x != const0_rtx)
20313 x = gen_reg_rtx (mode);
20315 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20316 : gen_rtx_SUBREG (DImode, x, 0)));
20320 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20323 arm_expand_binop_builtin (enum insn_code icode,
20324 tree exp, rtx target)
20327 tree arg0 = CALL_EXPR_ARG (exp, 0);
20328 tree arg1 = CALL_EXPR_ARG (exp, 1);
20329 rtx op0 = expand_normal (arg0);
20330 rtx op1 = expand_normal (arg1);
20331 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20332 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20333 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20335 if (VECTOR_MODE_P (mode0))
20336 op0 = safe_vector_operand (op0, mode0);
20337 if (VECTOR_MODE_P (mode1))
20338 op1 = safe_vector_operand (op1, mode1);
20341 || GET_MODE (target) != tmode
20342 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20343 target = gen_reg_rtx (tmode);
20345 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20348 op0 = copy_to_mode_reg (mode0, op0);
20349 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20350 op1 = copy_to_mode_reg (mode1, op1);
20352 pat = GEN_FCN (icode) (target, op0, op1);
20359 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20362 arm_expand_unop_builtin (enum insn_code icode,
20363 tree exp, rtx target, int do_load)
20366 tree arg0 = CALL_EXPR_ARG (exp, 0);
20367 rtx op0 = expand_normal (arg0);
20368 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20369 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20372 || GET_MODE (target) != tmode
20373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20374 target = gen_reg_rtx (tmode);
20376 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20379 if (VECTOR_MODE_P (mode0))
20380 op0 = safe_vector_operand (op0, mode0);
20382 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20383 op0 = copy_to_mode_reg (mode0, op0);
20386 pat = GEN_FCN (icode) (target, op0);
20394 NEON_ARG_COPY_TO_REG,
20400 #define NEON_MAX_BUILTIN_ARGS 5
20402 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20403 and return an expression for the accessed memory.
20405 The intrinsic function operates on a block of registers that has
20406 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20407 The function references the memory at EXP in mode MEM_MODE;
20408 this mode may be BLKmode if no more suitable mode is available. */
20411 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20412 enum machine_mode reg_mode,
20413 neon_builtin_type_mode type_mode)
20415 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20416 tree elem_type, upper_bound, array_type;
20418 /* Work out the size of the register block in bytes. */
20419 reg_size = GET_MODE_SIZE (reg_mode);
20421 /* Work out the size of each vector in bytes. */
20422 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20423 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20425 /* Work out how many vectors there are. */
20426 gcc_assert (reg_size % vector_size == 0);
20427 nvectors = reg_size / vector_size;
20429 /* Work out how many elements are being loaded or stored.
20430 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20431 and memory elements; anything else implies a lane load or store. */
20432 if (mem_mode == reg_mode)
20433 nelems = vector_size * nvectors;
20437 /* Work out the type of each element. */
20438 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20439 elem_type = TREE_TYPE (TREE_TYPE (exp));
20441 /* Create a type that describes the full access. */
20442 upper_bound = build_int_cst (size_type_node, nelems - 1);
20443 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20445 /* Dereference EXP using that type. */
20446 exp = convert (build_pointer_type (array_type), exp);
20447 return fold_build2 (MEM_REF, array_type, exp,
20448 build_int_cst (TREE_TYPE (exp), 0));
20451 /* Expand a Neon builtin. */
20453 arm_expand_neon_args (rtx target, int icode, int have_retval,
20454 neon_builtin_type_mode type_mode,
20459 tree arg[NEON_MAX_BUILTIN_ARGS];
20460 rtx op[NEON_MAX_BUILTIN_ARGS];
20461 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20462 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20463 enum machine_mode other_mode;
20469 || GET_MODE (target) != tmode
20470 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20471 target = gen_reg_rtx (tmode);
20473 va_start (ap, exp);
20477 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20479 if (thisarg == NEON_ARG_STOP)
20483 opno = argc + have_retval;
20484 mode[argc] = insn_data[icode].operand[opno].mode;
20485 arg[argc] = CALL_EXPR_ARG (exp, argc);
20486 if (thisarg == NEON_ARG_MEMORY)
20488 other_mode = insn_data[icode].operand[1 - opno].mode;
20489 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20490 other_mode, type_mode);
20492 op[argc] = expand_normal (arg[argc]);
20496 case NEON_ARG_COPY_TO_REG:
20497 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20498 if (!(*insn_data[icode].operand[opno].predicate)
20499 (op[argc], mode[argc]))
20500 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20503 case NEON_ARG_CONSTANT:
20504 /* FIXME: This error message is somewhat unhelpful. */
20505 if (!(*insn_data[icode].operand[opno].predicate)
20506 (op[argc], mode[argc]))
20507 error ("argument must be a constant");
20510 case NEON_ARG_MEMORY:
20511 gcc_assert (MEM_P (op[argc]));
20512 PUT_MODE (op[argc], mode[argc]);
20513 /* ??? arm_neon.h uses the same built-in functions for signed
20514 and unsigned accesses, casting where necessary. This isn't
20516 set_mem_alias_set (op[argc], 0);
20517 if (!(*insn_data[icode].operand[opno].predicate)
20518 (op[argc], mode[argc]))
20519 op[argc] = (replace_equiv_address
20520 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20523 case NEON_ARG_STOP:
20524 gcc_unreachable ();
20537 pat = GEN_FCN (icode) (target, op[0]);
20541 pat = GEN_FCN (icode) (target, op[0], op[1]);
20545 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20549 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20553 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20557 gcc_unreachable ();
20563 pat = GEN_FCN (icode) (op[0]);
20567 pat = GEN_FCN (icode) (op[0], op[1]);
20571 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20575 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20579 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20583 gcc_unreachable ();
20594 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20595 constants defined per-instruction or per instruction-variant. Instead, the
20596 required info is looked up in the table neon_builtin_data. */
20598 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20600 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20601 neon_itype itype = d->itype;
20602 enum insn_code icode = d->code;
20603 neon_builtin_type_mode type_mode = d->mode;
20610 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20611 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20615 case NEON_SCALARMUL:
20616 case NEON_SCALARMULL:
20617 case NEON_SCALARMULH:
20618 case NEON_SHIFTINSERT:
20619 case NEON_LOGICBINOP:
20620 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20621 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20625 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20626 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20627 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20631 case NEON_SHIFTIMM:
20632 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20633 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20637 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20638 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20642 case NEON_REINTERP:
20643 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20644 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20648 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20649 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20651 case NEON_RESULTPAIR:
20652 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20653 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20657 case NEON_LANEMULL:
20658 case NEON_LANEMULH:
20659 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20660 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20661 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20664 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20665 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20666 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20668 case NEON_SHIFTACC:
20669 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20670 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20671 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20673 case NEON_SCALARMAC:
20674 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20675 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20676 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20680 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20681 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20685 case NEON_LOADSTRUCT:
20686 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20687 NEON_ARG_MEMORY, NEON_ARG_STOP);
20689 case NEON_LOAD1LANE:
20690 case NEON_LOADSTRUCTLANE:
20691 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20692 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20696 case NEON_STORESTRUCT:
20697 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20698 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20700 case NEON_STORE1LANE:
20701 case NEON_STORESTRUCTLANE:
20702 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20703 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20707 gcc_unreachable ();
20710 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20712 neon_reinterpret (rtx dest, rtx src)
20714 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20717 /* Emit code to place a Neon pair result in memory locations (with equal
20720 neon_emit_pair_result_insn (enum machine_mode mode,
20721 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20724 rtx mem = gen_rtx_MEM (mode, destaddr);
20725 rtx tmp1 = gen_reg_rtx (mode);
20726 rtx tmp2 = gen_reg_rtx (mode);
20728 emit_insn (intfn (tmp1, op1, op2, tmp2));
20730 emit_move_insn (mem, tmp1);
20731 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20732 emit_move_insn (mem, tmp2);
20735 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20736 not to early-clobber SRC registers in the process.
20738 We assume that the operands described by SRC and DEST represent a
20739 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20740 number of components into which the copy has been decomposed. */
20742 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20746 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20747 || REGNO (operands[0]) < REGNO (operands[1]))
20749 for (i = 0; i < count; i++)
20751 operands[2 * i] = dest[i];
20752 operands[2 * i + 1] = src[i];
20757 for (i = 0; i < count; i++)
20759 operands[2 * i] = dest[count - i - 1];
20760 operands[2 * i + 1] = src[count - i - 1];
20765 /* Expand an expression EXP that calls a built-in function,
20766 with result going to TARGET if that's convenient
20767 (and in mode MODE if that's convenient).
20768 SUBTARGET may be used as the target for computing one of EXP's operands.
20769 IGNORE is nonzero if the value is to be ignored. */
20772 arm_expand_builtin (tree exp,
20774 rtx subtarget ATTRIBUTE_UNUSED,
20775 enum machine_mode mode ATTRIBUTE_UNUSED,
20776 int ignore ATTRIBUTE_UNUSED)
20778 const struct builtin_description * d;
20779 enum insn_code icode;
20780 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20788 int fcode = DECL_FUNCTION_CODE (fndecl);
20790 enum machine_mode tmode;
20791 enum machine_mode mode0;
20792 enum machine_mode mode1;
20793 enum machine_mode mode2;
20795 if (fcode >= ARM_BUILTIN_NEON_BASE)
20796 return arm_expand_neon_builtin (fcode, exp, target);
20800 case ARM_BUILTIN_TEXTRMSB:
20801 case ARM_BUILTIN_TEXTRMUB:
20802 case ARM_BUILTIN_TEXTRMSH:
20803 case ARM_BUILTIN_TEXTRMUH:
20804 case ARM_BUILTIN_TEXTRMSW:
20805 case ARM_BUILTIN_TEXTRMUW:
20806 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20807 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20808 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20809 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20810 : CODE_FOR_iwmmxt_textrmw);
20812 arg0 = CALL_EXPR_ARG (exp, 0);
20813 arg1 = CALL_EXPR_ARG (exp, 1);
20814 op0 = expand_normal (arg0);
20815 op1 = expand_normal (arg1);
20816 tmode = insn_data[icode].operand[0].mode;
20817 mode0 = insn_data[icode].operand[1].mode;
20818 mode1 = insn_data[icode].operand[2].mode;
20820 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20821 op0 = copy_to_mode_reg (mode0, op0);
20822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20824 /* @@@ better error message */
20825 error ("selector must be an immediate");
20826 return gen_reg_rtx (tmode);
20829 || GET_MODE (target) != tmode
20830 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20831 target = gen_reg_rtx (tmode);
20832 pat = GEN_FCN (icode) (target, op0, op1);
20838 case ARM_BUILTIN_TINSRB:
20839 case ARM_BUILTIN_TINSRH:
20840 case ARM_BUILTIN_TINSRW:
20841 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20842 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20843 : CODE_FOR_iwmmxt_tinsrw);
20844 arg0 = CALL_EXPR_ARG (exp, 0);
20845 arg1 = CALL_EXPR_ARG (exp, 1);
20846 arg2 = CALL_EXPR_ARG (exp, 2);
20847 op0 = expand_normal (arg0);
20848 op1 = expand_normal (arg1);
20849 op2 = expand_normal (arg2);
20850 tmode = insn_data[icode].operand[0].mode;
20851 mode0 = insn_data[icode].operand[1].mode;
20852 mode1 = insn_data[icode].operand[2].mode;
20853 mode2 = insn_data[icode].operand[3].mode;
20855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20856 op0 = copy_to_mode_reg (mode0, op0);
20857 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20858 op1 = copy_to_mode_reg (mode1, op1);
20859 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20861 /* @@@ better error message */
20862 error ("selector must be an immediate");
20866 || GET_MODE (target) != tmode
20867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20868 target = gen_reg_rtx (tmode);
20869 pat = GEN_FCN (icode) (target, op0, op1, op2);
20875 case ARM_BUILTIN_SETWCX:
20876 arg0 = CALL_EXPR_ARG (exp, 0);
20877 arg1 = CALL_EXPR_ARG (exp, 1);
20878 op0 = force_reg (SImode, expand_normal (arg0));
20879 op1 = expand_normal (arg1);
20880 emit_insn (gen_iwmmxt_tmcr (op1, op0));
20883 case ARM_BUILTIN_GETWCX:
20884 arg0 = CALL_EXPR_ARG (exp, 0);
20885 op0 = expand_normal (arg0);
20886 target = gen_reg_rtx (SImode);
20887 emit_insn (gen_iwmmxt_tmrc (target, op0));
20890 case ARM_BUILTIN_WSHUFH:
20891 icode = CODE_FOR_iwmmxt_wshufh;
20892 arg0 = CALL_EXPR_ARG (exp, 0);
20893 arg1 = CALL_EXPR_ARG (exp, 1);
20894 op0 = expand_normal (arg0);
20895 op1 = expand_normal (arg1);
20896 tmode = insn_data[icode].operand[0].mode;
20897 mode1 = insn_data[icode].operand[1].mode;
20898 mode2 = insn_data[icode].operand[2].mode;
20900 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20901 op0 = copy_to_mode_reg (mode1, op0);
20902 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20904 /* @@@ better error message */
20905 error ("mask must be an immediate");
20909 || GET_MODE (target) != tmode
20910 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20911 target = gen_reg_rtx (tmode);
20912 pat = GEN_FCN (icode) (target, op0, op1);
20918 case ARM_BUILTIN_WSADB:
20919 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20920 case ARM_BUILTIN_WSADH:
20921 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20922 case ARM_BUILTIN_WSADBZ:
20923 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20924 case ARM_BUILTIN_WSADHZ:
20925 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20927 /* Several three-argument builtins. */
20928 case ARM_BUILTIN_WMACS:
20929 case ARM_BUILTIN_WMACU:
20930 case ARM_BUILTIN_WALIGN:
20931 case ARM_BUILTIN_TMIA:
20932 case ARM_BUILTIN_TMIAPH:
20933 case ARM_BUILTIN_TMIATT:
20934 case ARM_BUILTIN_TMIATB:
20935 case ARM_BUILTIN_TMIABT:
20936 case ARM_BUILTIN_TMIABB:
20937 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20938 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20939 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20940 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20941 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20942 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20943 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20944 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20945 : CODE_FOR_iwmmxt_walign);
20946 arg0 = CALL_EXPR_ARG (exp, 0);
20947 arg1 = CALL_EXPR_ARG (exp, 1);
20948 arg2 = CALL_EXPR_ARG (exp, 2);
20949 op0 = expand_normal (arg0);
20950 op1 = expand_normal (arg1);
20951 op2 = expand_normal (arg2);
20952 tmode = insn_data[icode].operand[0].mode;
20953 mode0 = insn_data[icode].operand[1].mode;
20954 mode1 = insn_data[icode].operand[2].mode;
20955 mode2 = insn_data[icode].operand[3].mode;
20957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20958 op0 = copy_to_mode_reg (mode0, op0);
20959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20960 op1 = copy_to_mode_reg (mode1, op1);
20961 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20962 op2 = copy_to_mode_reg (mode2, op2);
20964 || GET_MODE (target) != tmode
20965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20966 target = gen_reg_rtx (tmode);
20967 pat = GEN_FCN (icode) (target, op0, op1, op2);
20973 case ARM_BUILTIN_WZERO:
20974 target = gen_reg_rtx (DImode);
20975 emit_insn (gen_iwmmxt_clrdi (target));
20978 case ARM_BUILTIN_THREAD_POINTER:
20979 return arm_load_tp (target);
20985 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20986 if (d->code == (const enum arm_builtins) fcode)
20987 return arm_expand_binop_builtin (d->icode, exp, target);
20989 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20990 if (d->code == (const enum arm_builtins) fcode)
20991 return arm_expand_unop_builtin (d->icode, exp, target, 0);
20993 /* @@@ Should really do something sensible here. */
20997 /* Return the number (counting from 0) of
20998 the least significant set bit in MASK. */
21001 number_of_first_bit_set (unsigned mask)
21003 return ctz_hwi (mask);
21006 /* Like emit_multi_reg_push, but allowing for a different set of
21007 registers to be described as saved. MASK is the set of registers
21008 to be saved; REAL_REGS is the set of registers to be described as
21009 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21012 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21014 unsigned long regno;
21015 rtx par[10], tmp, reg, insn;
21018 /* Build the parallel of the registers actually being stored. */
21019 for (i = 0; mask; ++i, mask &= mask - 1)
21021 regno = ctz_hwi (mask);
21022 reg = gen_rtx_REG (SImode, regno);
21025 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21027 tmp = gen_rtx_USE (VOIDmode, reg);
21032 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21033 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21034 tmp = gen_frame_mem (BLKmode, tmp);
21035 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21038 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21039 insn = emit_insn (tmp);
21041 /* Always build the stack adjustment note for unwind info. */
21042 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21043 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21046 /* Build the parallel of the registers recorded as saved for unwind. */
21047 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21049 regno = ctz_hwi (real_regs);
21050 reg = gen_rtx_REG (SImode, regno);
21052 tmp = plus_constant (stack_pointer_rtx, j * 4);
21053 tmp = gen_frame_mem (SImode, tmp);
21054 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21055 RTX_FRAME_RELATED_P (tmp) = 1;
21063 RTX_FRAME_RELATED_P (par[0]) = 1;
21064 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21072 /* Emit code to push or pop registers to or from the stack. F is the
21073 assembly file. MASK is the registers to pop. */
21075 thumb_pop (FILE *f, unsigned long mask)
21078 int lo_mask = mask & 0xFF;
21079 int pushed_words = 0;
21083 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21085 /* Special case. Do not generate a POP PC statement here, do it in
21087 thumb_exit (f, -1);
21091 fprintf (f, "\tpop\t{");
21093 /* Look at the low registers first. */
21094 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21098 asm_fprintf (f, "%r", regno);
21100 if ((lo_mask & ~1) != 0)
21107 if (mask & (1 << PC_REGNUM))
21109 /* Catch popping the PC. */
21110 if (TARGET_INTERWORK || TARGET_BACKTRACE
21111 || crtl->calls_eh_return)
21113 /* The PC is never poped directly, instead
21114 it is popped into r3 and then BX is used. */
21115 fprintf (f, "}\n");
21117 thumb_exit (f, -1);
21126 asm_fprintf (f, "%r", PC_REGNUM);
21130 fprintf (f, "}\n");
21133 /* Generate code to return from a thumb function.
21134 If 'reg_containing_return_addr' is -1, then the return address is
21135 actually on the stack, at the stack pointer. */
21137 thumb_exit (FILE *f, int reg_containing_return_addr)
21139 unsigned regs_available_for_popping;
21140 unsigned regs_to_pop;
21142 unsigned available;
21146 int restore_a4 = FALSE;
21148 /* Compute the registers we need to pop. */
21152 if (reg_containing_return_addr == -1)
21154 regs_to_pop |= 1 << LR_REGNUM;
21158 if (TARGET_BACKTRACE)
21160 /* Restore the (ARM) frame pointer and stack pointer. */
21161 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21165 /* If there is nothing to pop then just emit the BX instruction and
21167 if (pops_needed == 0)
21169 if (crtl->calls_eh_return)
21170 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21172 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21175 /* Otherwise if we are not supporting interworking and we have not created
21176 a backtrace structure and the function was not entered in ARM mode then
21177 just pop the return address straight into the PC. */
21178 else if (!TARGET_INTERWORK
21179 && !TARGET_BACKTRACE
21180 && !is_called_in_ARM_mode (current_function_decl)
21181 && !crtl->calls_eh_return)
21183 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21187 /* Find out how many of the (return) argument registers we can corrupt. */
21188 regs_available_for_popping = 0;
21190 /* If returning via __builtin_eh_return, the bottom three registers
21191 all contain information needed for the return. */
21192 if (crtl->calls_eh_return)
21196 /* If we can deduce the registers used from the function's
21197 return value. This is more reliable that examining
21198 df_regs_ever_live_p () because that will be set if the register is
21199 ever used in the function, not just if the register is used
21200 to hold a return value. */
21202 if (crtl->return_rtx != 0)
21203 mode = GET_MODE (crtl->return_rtx);
21205 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21207 size = GET_MODE_SIZE (mode);
21211 /* In a void function we can use any argument register.
21212 In a function that returns a structure on the stack
21213 we can use the second and third argument registers. */
21214 if (mode == VOIDmode)
21215 regs_available_for_popping =
21216 (1 << ARG_REGISTER (1))
21217 | (1 << ARG_REGISTER (2))
21218 | (1 << ARG_REGISTER (3));
21220 regs_available_for_popping =
21221 (1 << ARG_REGISTER (2))
21222 | (1 << ARG_REGISTER (3));
21224 else if (size <= 4)
21225 regs_available_for_popping =
21226 (1 << ARG_REGISTER (2))
21227 | (1 << ARG_REGISTER (3));
21228 else if (size <= 8)
21229 regs_available_for_popping =
21230 (1 << ARG_REGISTER (3));
21233 /* Match registers to be popped with registers into which we pop them. */
21234 for (available = regs_available_for_popping,
21235 required = regs_to_pop;
21236 required != 0 && available != 0;
21237 available &= ~(available & - available),
21238 required &= ~(required & - required))
21241 /* If we have any popping registers left over, remove them. */
21243 regs_available_for_popping &= ~available;
21245 /* Otherwise if we need another popping register we can use
21246 the fourth argument register. */
21247 else if (pops_needed)
21249 /* If we have not found any free argument registers and
21250 reg a4 contains the return address, we must move it. */
21251 if (regs_available_for_popping == 0
21252 && reg_containing_return_addr == LAST_ARG_REGNUM)
21254 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21255 reg_containing_return_addr = LR_REGNUM;
21257 else if (size > 12)
21259 /* Register a4 is being used to hold part of the return value,
21260 but we have dire need of a free, low register. */
21263 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21266 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21268 /* The fourth argument register is available. */
21269 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21275 /* Pop as many registers as we can. */
21276 thumb_pop (f, regs_available_for_popping);
21278 /* Process the registers we popped. */
21279 if (reg_containing_return_addr == -1)
21281 /* The return address was popped into the lowest numbered register. */
21282 regs_to_pop &= ~(1 << LR_REGNUM);
21284 reg_containing_return_addr =
21285 number_of_first_bit_set (regs_available_for_popping);
21287 /* Remove this register for the mask of available registers, so that
21288 the return address will not be corrupted by further pops. */
21289 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21292 /* If we popped other registers then handle them here. */
21293 if (regs_available_for_popping)
21297 /* Work out which register currently contains the frame pointer. */
21298 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21300 /* Move it into the correct place. */
21301 asm_fprintf (f, "\tmov\t%r, %r\n",
21302 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21304 /* (Temporarily) remove it from the mask of popped registers. */
21305 regs_available_for_popping &= ~(1 << frame_pointer);
21306 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21308 if (regs_available_for_popping)
21312 /* We popped the stack pointer as well,
21313 find the register that contains it. */
21314 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21316 /* Move it into the stack register. */
21317 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21319 /* At this point we have popped all necessary registers, so
21320 do not worry about restoring regs_available_for_popping
21321 to its correct value:
21323 assert (pops_needed == 0)
21324 assert (regs_available_for_popping == (1 << frame_pointer))
21325 assert (regs_to_pop == (1 << STACK_POINTER)) */
21329 /* Since we have just move the popped value into the frame
21330 pointer, the popping register is available for reuse, and
21331 we know that we still have the stack pointer left to pop. */
21332 regs_available_for_popping |= (1 << frame_pointer);
21336 /* If we still have registers left on the stack, but we no longer have
21337 any registers into which we can pop them, then we must move the return
21338 address into the link register and make available the register that
21340 if (regs_available_for_popping == 0 && pops_needed > 0)
21342 regs_available_for_popping |= 1 << reg_containing_return_addr;
21344 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21345 reg_containing_return_addr);
21347 reg_containing_return_addr = LR_REGNUM;
21350 /* If we have registers left on the stack then pop some more.
21351 We know that at most we will want to pop FP and SP. */
21352 if (pops_needed > 0)
21357 thumb_pop (f, regs_available_for_popping);
21359 /* We have popped either FP or SP.
21360 Move whichever one it is into the correct register. */
21361 popped_into = number_of_first_bit_set (regs_available_for_popping);
21362 move_to = number_of_first_bit_set (regs_to_pop);
21364 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21366 regs_to_pop &= ~(1 << move_to);
21371 /* If we still have not popped everything then we must have only
21372 had one register available to us and we are now popping the SP. */
21373 if (pops_needed > 0)
21377 thumb_pop (f, regs_available_for_popping);
21379 popped_into = number_of_first_bit_set (regs_available_for_popping);
21381 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21383 assert (regs_to_pop == (1 << STACK_POINTER))
21384 assert (pops_needed == 1)
21388 /* If necessary restore the a4 register. */
21391 if (reg_containing_return_addr != LR_REGNUM)
21393 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21394 reg_containing_return_addr = LR_REGNUM;
21397 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21400 if (crtl->calls_eh_return)
21401 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21403 /* Return to caller. */
21404 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21407 /* Scan INSN just before assembler is output for it.
21408 For Thumb-1, we track the status of the condition codes; this
21409 information is used in the cbranchsi4_insn pattern. */
21411 thumb1_final_prescan_insn (rtx insn)
21413 if (flag_print_asm_name)
21414 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21415 INSN_ADDRESSES (INSN_UID (insn)));
21416 /* Don't overwrite the previous setter when we get to a cbranch. */
21417 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21419 enum attr_conds conds;
21421 if (cfun->machine->thumb1_cc_insn)
21423 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21424 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21427 conds = get_attr_conds (insn);
21428 if (conds == CONDS_SET)
21430 rtx set = single_set (insn);
21431 cfun->machine->thumb1_cc_insn = insn;
21432 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21433 cfun->machine->thumb1_cc_op1 = const0_rtx;
21434 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21435 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21437 rtx src1 = XEXP (SET_SRC (set), 1);
21438 if (src1 == const0_rtx)
21439 cfun->machine->thumb1_cc_mode = CCmode;
21442 else if (conds != CONDS_NOCOND)
21443 cfun->machine->thumb1_cc_insn = NULL_RTX;
21448 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21450 unsigned HOST_WIDE_INT mask = 0xff;
21453 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21454 if (val == 0) /* XXX */
21457 for (i = 0; i < 25; i++)
21458 if ((val & (mask << i)) == val)
21464 /* Returns nonzero if the current function contains,
21465 or might contain a far jump. */
21467 thumb_far_jump_used_p (void)
21471 /* This test is only important for leaf functions. */
21472 /* assert (!leaf_function_p ()); */
21474 /* If we have already decided that far jumps may be used,
21475 do not bother checking again, and always return true even if
21476 it turns out that they are not being used. Once we have made
21477 the decision that far jumps are present (and that hence the link
21478 register will be pushed onto the stack) we cannot go back on it. */
21479 if (cfun->machine->far_jump_used)
21482 /* If this function is not being called from the prologue/epilogue
21483 generation code then it must be being called from the
21484 INITIAL_ELIMINATION_OFFSET macro. */
21485 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21487 /* In this case we know that we are being asked about the elimination
21488 of the arg pointer register. If that register is not being used,
21489 then there are no arguments on the stack, and we do not have to
21490 worry that a far jump might force the prologue to push the link
21491 register, changing the stack offsets. In this case we can just
21492 return false, since the presence of far jumps in the function will
21493 not affect stack offsets.
21495 If the arg pointer is live (or if it was live, but has now been
21496 eliminated and so set to dead) then we do have to test to see if
21497 the function might contain a far jump. This test can lead to some
21498 false negatives, since before reload is completed, then length of
21499 branch instructions is not known, so gcc defaults to returning their
21500 longest length, which in turn sets the far jump attribute to true.
21502 A false negative will not result in bad code being generated, but it
21503 will result in a needless push and pop of the link register. We
21504 hope that this does not occur too often.
21506 If we need doubleword stack alignment this could affect the other
21507 elimination offsets so we can't risk getting it wrong. */
21508 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21509 cfun->machine->arg_pointer_live = 1;
21510 else if (!cfun->machine->arg_pointer_live)
21514 /* Check to see if the function contains a branch
21515 insn with the far jump attribute set. */
21516 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21518 if (GET_CODE (insn) == JUMP_INSN
21519 /* Ignore tablejump patterns. */
21520 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21521 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21522 && get_attr_far_jump (insn) == FAR_JUMP_YES
21525 /* Record the fact that we have decided that
21526 the function does use far jumps. */
21527 cfun->machine->far_jump_used = 1;
21535 /* Return nonzero if FUNC must be entered in ARM mode. */
21537 is_called_in_ARM_mode (tree func)
21539 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21541 /* Ignore the problem about functions whose address is taken. */
21542 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21546 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21552 /* Given the stack offsets and register mask in OFFSETS, decide how
21553 many additional registers to push instead of subtracting a constant
21554 from SP. For epilogues the principle is the same except we use pop.
21555 FOR_PROLOGUE indicates which we're generating. */
21557 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21559 HOST_WIDE_INT amount;
21560 unsigned long live_regs_mask = offsets->saved_regs_mask;
21561 /* Extract a mask of the ones we can give to the Thumb's push/pop
21563 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21564 /* Then count how many other high registers will need to be pushed. */
21565 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21566 int n_free, reg_base;
21568 if (!for_prologue && frame_pointer_needed)
21569 amount = offsets->locals_base - offsets->saved_regs;
21571 amount = offsets->outgoing_args - offsets->saved_regs;
21573 /* If the stack frame size is 512 exactly, we can save one load
21574 instruction, which should make this a win even when optimizing
21576 if (!optimize_size && amount != 512)
21579 /* Can't do this if there are high registers to push. */
21580 if (high_regs_pushed != 0)
21583 /* Shouldn't do it in the prologue if no registers would normally
21584 be pushed at all. In the epilogue, also allow it if we'll have
21585 a pop insn for the PC. */
21588 || TARGET_BACKTRACE
21589 || (live_regs_mask & 1 << LR_REGNUM) == 0
21590 || TARGET_INTERWORK
21591 || crtl->args.pretend_args_size != 0))
21594 /* Don't do this if thumb_expand_prologue wants to emit instructions
21595 between the push and the stack frame allocation. */
21597 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21598 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21605 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21606 live_regs_mask >>= reg_base;
21609 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21610 && (for_prologue || call_used_regs[reg_base + n_free]))
21612 live_regs_mask >>= 1;
21618 gcc_assert (amount / 4 * 4 == amount);
21620 if (amount >= 512 && (amount - n_free * 4) < 512)
21621 return (amount - 508) / 4;
21622 if (amount <= n_free * 4)
21627 /* The bits which aren't usefully expanded as rtl. */
21629 thumb_unexpanded_epilogue (void)
21631 arm_stack_offsets *offsets;
21633 unsigned long live_regs_mask = 0;
21634 int high_regs_pushed = 0;
21636 int had_to_push_lr;
21639 if (cfun->machine->return_used_this_function != 0)
21642 if (IS_NAKED (arm_current_func_type ()))
21645 offsets = arm_get_frame_offsets ();
21646 live_regs_mask = offsets->saved_regs_mask;
21647 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21649 /* If we can deduce the registers used from the function's return value.
21650 This is more reliable that examining df_regs_ever_live_p () because that
21651 will be set if the register is ever used in the function, not just if
21652 the register is used to hold a return value. */
21653 size = arm_size_return_regs ();
21655 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21658 unsigned long extra_mask = (1 << extra_pop) - 1;
21659 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21663 /* The prolog may have pushed some high registers to use as
21664 work registers. e.g. the testsuite file:
21665 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21666 compiles to produce:
21667 push {r4, r5, r6, r7, lr}
21671 as part of the prolog. We have to undo that pushing here. */
21673 if (high_regs_pushed)
21675 unsigned long mask = live_regs_mask & 0xff;
21678 /* The available low registers depend on the size of the value we are
21686 /* Oh dear! We have no low registers into which we can pop
21689 ("no low registers available for popping high registers");
21691 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21692 if (live_regs_mask & (1 << next_hi_reg))
21695 while (high_regs_pushed)
21697 /* Find lo register(s) into which the high register(s) can
21699 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21701 if (mask & (1 << regno))
21702 high_regs_pushed--;
21703 if (high_regs_pushed == 0)
21707 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21709 /* Pop the values into the low register(s). */
21710 thumb_pop (asm_out_file, mask);
21712 /* Move the value(s) into the high registers. */
21713 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21715 if (mask & (1 << regno))
21717 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21720 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21721 if (live_regs_mask & (1 << next_hi_reg))
21726 live_regs_mask &= ~0x0f00;
21729 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21730 live_regs_mask &= 0xff;
21732 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21734 /* Pop the return address into the PC. */
21735 if (had_to_push_lr)
21736 live_regs_mask |= 1 << PC_REGNUM;
21738 /* Either no argument registers were pushed or a backtrace
21739 structure was created which includes an adjusted stack
21740 pointer, so just pop everything. */
21741 if (live_regs_mask)
21742 thumb_pop (asm_out_file, live_regs_mask);
21744 /* We have either just popped the return address into the
21745 PC or it is was kept in LR for the entire function.
21746 Note that thumb_pop has already called thumb_exit if the
21747 PC was in the list. */
21748 if (!had_to_push_lr)
21749 thumb_exit (asm_out_file, LR_REGNUM);
21753 /* Pop everything but the return address. */
21754 if (live_regs_mask)
21755 thumb_pop (asm_out_file, live_regs_mask);
21757 if (had_to_push_lr)
21761 /* We have no free low regs, so save one. */
21762 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21766 /* Get the return address into a temporary register. */
21767 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21771 /* Move the return address to lr. */
21772 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21774 /* Restore the low register. */
21775 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21780 regno = LAST_ARG_REGNUM;
21785 /* Remove the argument registers that were pushed onto the stack. */
21786 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21787 SP_REGNUM, SP_REGNUM,
21788 crtl->args.pretend_args_size);
21790 thumb_exit (asm_out_file, regno);
21796 /* Functions to save and restore machine-specific function data. */
21797 static struct machine_function *
21798 arm_init_machine_status (void)
21800 struct machine_function *machine;
21801 machine = ggc_alloc_cleared_machine_function ();
21803 #if ARM_FT_UNKNOWN != 0
21804 machine->func_type = ARM_FT_UNKNOWN;
21809 /* Return an RTX indicating where the return address to the
21810 calling function can be found. */
21812 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21817 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21820 /* Do anything needed before RTL is emitted for each function. */
21822 arm_init_expanders (void)
21824 /* Arrange to initialize and mark the machine per-function status. */
21825 init_machine_status = arm_init_machine_status;
21827 /* This is to stop the combine pass optimizing away the alignment
21828 adjustment of va_arg. */
21829 /* ??? It is claimed that this should not be necessary. */
21831 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21835 /* Like arm_compute_initial_elimination offset. Simpler because there
21836 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21837 to point at the base of the local variables after static stack
21838 space for a function has been allocated. */
21841 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21843 arm_stack_offsets *offsets;
21845 offsets = arm_get_frame_offsets ();
21849 case ARG_POINTER_REGNUM:
21852 case STACK_POINTER_REGNUM:
21853 return offsets->outgoing_args - offsets->saved_args;
21855 case FRAME_POINTER_REGNUM:
21856 return offsets->soft_frame - offsets->saved_args;
21858 case ARM_HARD_FRAME_POINTER_REGNUM:
21859 return offsets->saved_regs - offsets->saved_args;
21861 case THUMB_HARD_FRAME_POINTER_REGNUM:
21862 return offsets->locals_base - offsets->saved_args;
21865 gcc_unreachable ();
21869 case FRAME_POINTER_REGNUM:
21872 case STACK_POINTER_REGNUM:
21873 return offsets->outgoing_args - offsets->soft_frame;
21875 case ARM_HARD_FRAME_POINTER_REGNUM:
21876 return offsets->saved_regs - offsets->soft_frame;
21878 case THUMB_HARD_FRAME_POINTER_REGNUM:
21879 return offsets->locals_base - offsets->soft_frame;
21882 gcc_unreachable ();
21887 gcc_unreachable ();
21891 /* Generate the function's prologue. */
21894 thumb1_expand_prologue (void)
21898 HOST_WIDE_INT amount;
21899 arm_stack_offsets *offsets;
21900 unsigned long func_type;
21902 unsigned long live_regs_mask;
21903 unsigned long l_mask;
21904 unsigned high_regs_pushed = 0;
21906 func_type = arm_current_func_type ();
21908 /* Naked functions don't have prologues. */
21909 if (IS_NAKED (func_type))
21912 if (IS_INTERRUPT (func_type))
21914 error ("interrupt Service Routines cannot be coded in Thumb mode");
21918 if (is_called_in_ARM_mode (current_function_decl))
21919 emit_insn (gen_prologue_thumb1_interwork ());
21921 offsets = arm_get_frame_offsets ();
21922 live_regs_mask = offsets->saved_regs_mask;
21924 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21925 l_mask = live_regs_mask & 0x40ff;
21926 /* Then count how many other high registers will need to be pushed. */
21927 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21929 if (crtl->args.pretend_args_size)
21931 rtx x = GEN_INT (-crtl->args.pretend_args_size);
21933 if (cfun->machine->uses_anonymous_args)
21935 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21936 unsigned long mask;
21938 mask = 1ul << (LAST_ARG_REGNUM + 1);
21939 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
21941 insn = thumb1_emit_multi_reg_push (mask, 0);
21945 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21946 stack_pointer_rtx, x));
21948 RTX_FRAME_RELATED_P (insn) = 1;
21951 if (TARGET_BACKTRACE)
21953 HOST_WIDE_INT offset = 0;
21954 unsigned work_register;
21955 rtx work_reg, x, arm_hfp_rtx;
21957 /* We have been asked to create a stack backtrace structure.
21958 The code looks like this:
21962 0 sub SP, #16 Reserve space for 4 registers.
21963 2 push {R7} Push low registers.
21964 4 add R7, SP, #20 Get the stack pointer before the push.
21965 6 str R7, [SP, #8] Store the stack pointer
21966 (before reserving the space).
21967 8 mov R7, PC Get hold of the start of this code + 12.
21968 10 str R7, [SP, #16] Store it.
21969 12 mov R7, FP Get hold of the current frame pointer.
21970 14 str R7, [SP, #4] Store it.
21971 16 mov R7, LR Get hold of the current return address.
21972 18 str R7, [SP, #12] Store it.
21973 20 add R7, SP, #16 Point at the start of the
21974 backtrace structure.
21975 22 mov FP, R7 Put this value into the frame pointer. */
21977 work_register = thumb_find_work_register (live_regs_mask);
21978 work_reg = gen_rtx_REG (SImode, work_register);
21979 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
21981 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21982 stack_pointer_rtx, GEN_INT (-16)));
21983 RTX_FRAME_RELATED_P (insn) = 1;
21987 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
21988 RTX_FRAME_RELATED_P (insn) = 1;
21990 offset = bit_count (l_mask) * UNITS_PER_WORD;
21993 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
21994 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21996 x = plus_constant (stack_pointer_rtx, offset + 4);
21997 x = gen_frame_mem (SImode, x);
21998 emit_move_insn (x, work_reg);
22000 /* Make sure that the instruction fetching the PC is in the right place
22001 to calculate "start of backtrace creation code + 12". */
22002 /* ??? The stores using the common WORK_REG ought to be enough to
22003 prevent the scheduler from doing anything weird. Failing that
22004 we could always move all of the following into an UNSPEC_VOLATILE. */
22007 x = gen_rtx_REG (SImode, PC_REGNUM);
22008 emit_move_insn (work_reg, x);
22010 x = plus_constant (stack_pointer_rtx, offset + 12);
22011 x = gen_frame_mem (SImode, x);
22012 emit_move_insn (x, work_reg);
22014 emit_move_insn (work_reg, arm_hfp_rtx);
22016 x = plus_constant (stack_pointer_rtx, offset);
22017 x = gen_frame_mem (SImode, x);
22018 emit_move_insn (x, work_reg);
22022 emit_move_insn (work_reg, arm_hfp_rtx);
22024 x = plus_constant (stack_pointer_rtx, offset);
22025 x = gen_frame_mem (SImode, x);
22026 emit_move_insn (x, work_reg);
22028 x = gen_rtx_REG (SImode, PC_REGNUM);
22029 emit_move_insn (work_reg, x);
22031 x = plus_constant (stack_pointer_rtx, offset + 12);
22032 x = gen_frame_mem (SImode, x);
22033 emit_move_insn (x, work_reg);
22036 x = gen_rtx_REG (SImode, LR_REGNUM);
22037 emit_move_insn (work_reg, x);
22039 x = plus_constant (stack_pointer_rtx, offset + 8);
22040 x = gen_frame_mem (SImode, x);
22041 emit_move_insn (x, work_reg);
22043 x = GEN_INT (offset + 12);
22044 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22046 emit_move_insn (arm_hfp_rtx, work_reg);
22048 /* Optimization: If we are not pushing any low registers but we are going
22049 to push some high registers then delay our first push. This will just
22050 be a push of LR and we can combine it with the push of the first high
22052 else if ((l_mask & 0xff) != 0
22053 || (high_regs_pushed == 0 && l_mask))
22055 unsigned long mask = l_mask;
22056 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22057 insn = thumb1_emit_multi_reg_push (mask, mask);
22058 RTX_FRAME_RELATED_P (insn) = 1;
22061 if (high_regs_pushed)
22063 unsigned pushable_regs;
22064 unsigned next_hi_reg;
22066 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22067 if (live_regs_mask & (1 << next_hi_reg))
22070 pushable_regs = l_mask & 0xff;
22072 if (pushable_regs == 0)
22073 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22075 while (high_regs_pushed > 0)
22077 unsigned long real_regs_mask = 0;
22079 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22081 if (pushable_regs & (1 << regno))
22083 emit_move_insn (gen_rtx_REG (SImode, regno),
22084 gen_rtx_REG (SImode, next_hi_reg));
22086 high_regs_pushed --;
22087 real_regs_mask |= (1 << next_hi_reg);
22089 if (high_regs_pushed)
22091 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22093 if (live_regs_mask & (1 << next_hi_reg))
22098 pushable_regs &= ~((1 << regno) - 1);
22104 /* If we had to find a work register and we have not yet
22105 saved the LR then add it to the list of regs to push. */
22106 if (l_mask == (1 << LR_REGNUM))
22108 pushable_regs |= l_mask;
22109 real_regs_mask |= l_mask;
22113 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22114 RTX_FRAME_RELATED_P (insn) = 1;
22118 /* Load the pic register before setting the frame pointer,
22119 so we can use r7 as a temporary work register. */
22120 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22121 arm_load_pic_register (live_regs_mask);
22123 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22124 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22125 stack_pointer_rtx);
22127 if (flag_stack_usage_info)
22128 current_function_static_stack_size
22129 = offsets->outgoing_args - offsets->saved_args;
22131 amount = offsets->outgoing_args - offsets->saved_regs;
22132 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22137 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22138 GEN_INT (- amount)));
22139 RTX_FRAME_RELATED_P (insn) = 1;
22145 /* The stack decrement is too big for an immediate value in a single
22146 insn. In theory we could issue multiple subtracts, but after
22147 three of them it becomes more space efficient to place the full
22148 value in the constant pool and load into a register. (Also the
22149 ARM debugger really likes to see only one stack decrement per
22150 function). So instead we look for a scratch register into which
22151 we can load the decrement, and then we subtract this from the
22152 stack pointer. Unfortunately on the thumb the only available
22153 scratch registers are the argument registers, and we cannot use
22154 these as they may hold arguments to the function. Instead we
22155 attempt to locate a call preserved register which is used by this
22156 function. If we can find one, then we know that it will have
22157 been pushed at the start of the prologue and so we can corrupt
22159 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22160 if (live_regs_mask & (1 << regno))
22163 gcc_assert(regno <= LAST_LO_REGNUM);
22165 reg = gen_rtx_REG (SImode, regno);
22167 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22169 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22170 stack_pointer_rtx, reg));
22172 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22173 plus_constant (stack_pointer_rtx,
22175 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22176 RTX_FRAME_RELATED_P (insn) = 1;
22180 if (frame_pointer_needed)
22181 thumb_set_frame_pointer (offsets);
22183 /* If we are profiling, make sure no instructions are scheduled before
22184 the call to mcount. Similarly if the user has requested no
22185 scheduling in the prolog. Similarly if we want non-call exceptions
22186 using the EABI unwinder, to prevent faulting instructions from being
22187 swapped with a stack adjustment. */
22188 if (crtl->profile || !TARGET_SCHED_PROLOG
22189 || (arm_except_unwind_info (&global_options) == UI_TARGET
22190 && cfun->can_throw_non_call_exceptions))
22191 emit_insn (gen_blockage ());
22193 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22194 if (live_regs_mask & 0xff)
22195 cfun->machine->lr_save_eliminated = 0;
22200 thumb1_expand_epilogue (void)
22202 HOST_WIDE_INT amount;
22203 arm_stack_offsets *offsets;
22206 /* Naked functions don't have prologues. */
22207 if (IS_NAKED (arm_current_func_type ()))
22210 offsets = arm_get_frame_offsets ();
22211 amount = offsets->outgoing_args - offsets->saved_regs;
22213 if (frame_pointer_needed)
22215 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22216 amount = offsets->locals_base - offsets->saved_regs;
22218 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22220 gcc_assert (amount >= 0);
22223 emit_insn (gen_blockage ());
22226 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22227 GEN_INT (amount)));
22230 /* r3 is always free in the epilogue. */
22231 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22233 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22234 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22238 /* Emit a USE (stack_pointer_rtx), so that
22239 the stack adjustment will not be deleted. */
22240 emit_insn (gen_prologue_use (stack_pointer_rtx));
22242 if (crtl->profile || !TARGET_SCHED_PROLOG)
22243 emit_insn (gen_blockage ());
22245 /* Emit a clobber for each insn that will be restored in the epilogue,
22246 so that flow2 will get register lifetimes correct. */
22247 for (regno = 0; regno < 13; regno++)
22248 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22249 emit_clobber (gen_rtx_REG (SImode, regno));
22251 if (! df_regs_ever_live_p (LR_REGNUM))
22252 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22255 /* Implementation of insn prologue_thumb1_interwork. This is the first
22256 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22259 thumb1_output_interwork (void)
22262 FILE *f = asm_out_file;
22264 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22265 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22267 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22269 /* Generate code sequence to switch us into Thumb mode. */
22270 /* The .code 32 directive has already been emitted by
22271 ASM_DECLARE_FUNCTION_NAME. */
22272 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22273 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22275 /* Generate a label, so that the debugger will notice the
22276 change in instruction sets. This label is also used by
22277 the assembler to bypass the ARM code when this function
22278 is called from a Thumb encoded function elsewhere in the
22279 same file. Hence the definition of STUB_NAME here must
22280 agree with the definition in gas/config/tc-arm.c. */
22282 #define STUB_NAME ".real_start_of"
22284 fprintf (f, "\t.code\t16\n");
22286 if (arm_dllexport_name_p (name))
22287 name = arm_strip_name_encoding (name);
22289 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22290 fprintf (f, "\t.thumb_func\n");
22291 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22296 /* Handle the case of a double word load into a low register from
22297 a computed memory address. The computed address may involve a
22298 register which is overwritten by the load. */
22300 thumb_load_double_from_address (rtx *operands)
22308 gcc_assert (GET_CODE (operands[0]) == REG);
22309 gcc_assert (GET_CODE (operands[1]) == MEM);
22311 /* Get the memory address. */
22312 addr = XEXP (operands[1], 0);
22314 /* Work out how the memory address is computed. */
22315 switch (GET_CODE (addr))
22318 operands[2] = adjust_address (operands[1], SImode, 4);
22320 if (REGNO (operands[0]) == REGNO (addr))
22322 output_asm_insn ("ldr\t%H0, %2", operands);
22323 output_asm_insn ("ldr\t%0, %1", operands);
22327 output_asm_insn ("ldr\t%0, %1", operands);
22328 output_asm_insn ("ldr\t%H0, %2", operands);
22333 /* Compute <address> + 4 for the high order load. */
22334 operands[2] = adjust_address (operands[1], SImode, 4);
22336 output_asm_insn ("ldr\t%0, %1", operands);
22337 output_asm_insn ("ldr\t%H0, %2", operands);
22341 arg1 = XEXP (addr, 0);
22342 arg2 = XEXP (addr, 1);
22344 if (CONSTANT_P (arg1))
22345 base = arg2, offset = arg1;
22347 base = arg1, offset = arg2;
22349 gcc_assert (GET_CODE (base) == REG);
22351 /* Catch the case of <address> = <reg> + <reg> */
22352 if (GET_CODE (offset) == REG)
22354 int reg_offset = REGNO (offset);
22355 int reg_base = REGNO (base);
22356 int reg_dest = REGNO (operands[0]);
22358 /* Add the base and offset registers together into the
22359 higher destination register. */
22360 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22361 reg_dest + 1, reg_base, reg_offset);
22363 /* Load the lower destination register from the address in
22364 the higher destination register. */
22365 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22366 reg_dest, reg_dest + 1);
22368 /* Load the higher destination register from its own address
22370 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22371 reg_dest + 1, reg_dest + 1);
22375 /* Compute <address> + 4 for the high order load. */
22376 operands[2] = adjust_address (operands[1], SImode, 4);
22378 /* If the computed address is held in the low order register
22379 then load the high order register first, otherwise always
22380 load the low order register first. */
22381 if (REGNO (operands[0]) == REGNO (base))
22383 output_asm_insn ("ldr\t%H0, %2", operands);
22384 output_asm_insn ("ldr\t%0, %1", operands);
22388 output_asm_insn ("ldr\t%0, %1", operands);
22389 output_asm_insn ("ldr\t%H0, %2", operands);
22395 /* With no registers to worry about we can just load the value
22397 operands[2] = adjust_address (operands[1], SImode, 4);
22399 output_asm_insn ("ldr\t%H0, %2", operands);
22400 output_asm_insn ("ldr\t%0, %1", operands);
22404 gcc_unreachable ();
22411 thumb_output_move_mem_multiple (int n, rtx *operands)
22418 if (REGNO (operands[4]) > REGNO (operands[5]))
22421 operands[4] = operands[5];
22424 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22425 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22429 if (REGNO (operands[4]) > REGNO (operands[5]))
22432 operands[4] = operands[5];
22435 if (REGNO (operands[5]) > REGNO (operands[6]))
22438 operands[5] = operands[6];
22441 if (REGNO (operands[4]) > REGNO (operands[5]))
22444 operands[4] = operands[5];
22448 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22449 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22453 gcc_unreachable ();
22459 /* Output a call-via instruction for thumb state. */
22461 thumb_call_via_reg (rtx reg)
22463 int regno = REGNO (reg);
22466 gcc_assert (regno < LR_REGNUM);
22468 /* If we are in the normal text section we can use a single instance
22469 per compilation unit. If we are doing function sections, then we need
22470 an entry per section, since we can't rely on reachability. */
22471 if (in_section == text_section)
22473 thumb_call_reg_needed = 1;
22475 if (thumb_call_via_label[regno] == NULL)
22476 thumb_call_via_label[regno] = gen_label_rtx ();
22477 labelp = thumb_call_via_label + regno;
22481 if (cfun->machine->call_via[regno] == NULL)
22482 cfun->machine->call_via[regno] = gen_label_rtx ();
22483 labelp = cfun->machine->call_via + regno;
22486 output_asm_insn ("bl\t%a0", labelp);
22490 /* Routines for generating rtl. */
22492 thumb_expand_movmemqi (rtx *operands)
22494 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22495 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22496 HOST_WIDE_INT len = INTVAL (operands[2]);
22497 HOST_WIDE_INT offset = 0;
22501 emit_insn (gen_movmem12b (out, in, out, in));
22507 emit_insn (gen_movmem8b (out, in, out, in));
22513 rtx reg = gen_reg_rtx (SImode);
22514 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22515 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22522 rtx reg = gen_reg_rtx (HImode);
22523 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22524 plus_constant (in, offset))));
22525 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22533 rtx reg = gen_reg_rtx (QImode);
22534 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22535 plus_constant (in, offset))));
22536 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22542 thumb_reload_out_hi (rtx *operands)
22544 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22547 /* Handle reading a half-word from memory during reload. */
22549 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22551 gcc_unreachable ();
22554 /* Return the length of a function name prefix
22555 that starts with the character 'c'. */
22557 arm_get_strip_length (int c)
22561 ARM_NAME_ENCODING_LENGTHS
22566 /* Return a pointer to a function's name with any
22567 and all prefix encodings stripped from it. */
22569 arm_strip_name_encoding (const char *name)
22573 while ((skip = arm_get_strip_length (* name)))
22579 /* If there is a '*' anywhere in the name's prefix, then
22580 emit the stripped name verbatim, otherwise prepend an
22581 underscore if leading underscores are being used. */
22583 arm_asm_output_labelref (FILE *stream, const char *name)
22588 while ((skip = arm_get_strip_length (* name)))
22590 verbatim |= (*name == '*');
22595 fputs (name, stream);
22597 asm_fprintf (stream, "%U%s", name);
22601 arm_file_start (void)
22605 if (TARGET_UNIFIED_ASM)
22606 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22610 const char *fpu_name;
22611 if (arm_selected_arch)
22612 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22613 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22614 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22616 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22618 if (TARGET_SOFT_FLOAT)
22621 fpu_name = "softvfp";
22623 fpu_name = "softfpa";
22627 fpu_name = arm_fpu_desc->name;
22628 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22630 if (TARGET_HARD_FLOAT)
22631 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22632 if (TARGET_HARD_FLOAT_ABI)
22633 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22636 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22638 /* Some of these attributes only apply when the corresponding features
22639 are used. However we don't have any easy way of figuring this out.
22640 Conservatively record the setting that would have been used. */
22642 if (flag_rounding_math)
22643 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22645 if (!flag_unsafe_math_optimizations)
22647 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22648 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22650 if (flag_signaling_nans)
22651 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22653 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22654 flag_finite_math_only ? 1 : 3);
22656 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22657 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22658 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22660 /* Tag_ABI_optimization_goals. */
22663 else if (optimize >= 2)
22669 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22671 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22673 if (arm_fp16_format)
22674 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22676 if (arm_lang_output_object_attributes_hook)
22677 arm_lang_output_object_attributes_hook();
22680 default_file_start ();
22684 arm_file_end (void)
22688 if (NEED_INDICATE_EXEC_STACK)
22689 /* Add .note.GNU-stack. */
22690 file_end_indicate_exec_stack ();
22692 if (! thumb_call_reg_needed)
22695 switch_to_section (text_section);
22696 asm_fprintf (asm_out_file, "\t.code 16\n");
22697 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22699 for (regno = 0; regno < LR_REGNUM; regno++)
22701 rtx label = thumb_call_via_label[regno];
22705 targetm.asm_out.internal_label (asm_out_file, "L",
22706 CODE_LABEL_NUMBER (label));
22707 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22713 /* Symbols in the text segment can be accessed without indirecting via the
22714 constant pool; it may take an extra binary operation, but this is still
22715 faster than indirecting via memory. Don't do this when not optimizing,
22716 since we won't be calculating al of the offsets necessary to do this
22720 arm_encode_section_info (tree decl, rtx rtl, int first)
22722 if (optimize > 0 && TREE_CONSTANT (decl))
22723 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22725 default_encode_section_info (decl, rtl, first);
22727 #endif /* !ARM_PE */
22730 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22732 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22733 && !strcmp (prefix, "L"))
22735 arm_ccfsm_state = 0;
22736 arm_target_insn = NULL;
22738 default_internal_label (stream, prefix, labelno);
22741 /* Output code to add DELTA to the first argument, and then jump
22742 to FUNCTION. Used for C++ multiple inheritance. */
22744 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22745 HOST_WIDE_INT delta,
22746 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22749 static int thunk_label = 0;
22752 int mi_delta = delta;
22753 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22755 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22758 mi_delta = - mi_delta;
22762 int labelno = thunk_label++;
22763 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22764 /* Thunks are entered in arm mode when avaiable. */
22765 if (TARGET_THUMB1_ONLY)
22767 /* push r3 so we can use it as a temporary. */
22768 /* TODO: Omit this save if r3 is not used. */
22769 fputs ("\tpush {r3}\n", file);
22770 fputs ("\tldr\tr3, ", file);
22774 fputs ("\tldr\tr12, ", file);
22776 assemble_name (file, label);
22777 fputc ('\n', file);
22780 /* If we are generating PIC, the ldr instruction below loads
22781 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22782 the address of the add + 8, so we have:
22784 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22787 Note that we have "+ 1" because some versions of GNU ld
22788 don't set the low bit of the result for R_ARM_REL32
22789 relocations against thumb function symbols.
22790 On ARMv6M this is +4, not +8. */
22791 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22792 assemble_name (file, labelpc);
22793 fputs (":\n", file);
22794 if (TARGET_THUMB1_ONLY)
22796 /* This is 2 insns after the start of the thunk, so we know it
22797 is 4-byte aligned. */
22798 fputs ("\tadd\tr3, pc, r3\n", file);
22799 fputs ("\tmov r12, r3\n", file);
22802 fputs ("\tadd\tr12, pc, r12\n", file);
22804 else if (TARGET_THUMB1_ONLY)
22805 fputs ("\tmov r12, r3\n", file);
22807 if (TARGET_THUMB1_ONLY)
22809 if (mi_delta > 255)
22811 fputs ("\tldr\tr3, ", file);
22812 assemble_name (file, label);
22813 fputs ("+4\n", file);
22814 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22815 mi_op, this_regno, this_regno);
22817 else if (mi_delta != 0)
22819 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22820 mi_op, this_regno, this_regno,
22826 /* TODO: Use movw/movt for large constants when available. */
22827 while (mi_delta != 0)
22829 if ((mi_delta & (3 << shift)) == 0)
22833 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22834 mi_op, this_regno, this_regno,
22835 mi_delta & (0xff << shift));
22836 mi_delta &= ~(0xff << shift);
22843 if (TARGET_THUMB1_ONLY)
22844 fputs ("\tpop\t{r3}\n", file);
22846 fprintf (file, "\tbx\tr12\n");
22847 ASM_OUTPUT_ALIGN (file, 2);
22848 assemble_name (file, label);
22849 fputs (":\n", file);
22852 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22853 rtx tem = XEXP (DECL_RTL (function), 0);
22854 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22855 tem = gen_rtx_MINUS (GET_MODE (tem),
22857 gen_rtx_SYMBOL_REF (Pmode,
22858 ggc_strdup (labelpc)));
22859 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22862 /* Output ".word .LTHUNKn". */
22863 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
22865 if (TARGET_THUMB1_ONLY && mi_delta > 255)
22866 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
22870 fputs ("\tb\t", file);
22871 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
22872 if (NEED_PLT_RELOC)
22873 fputs ("(PLT)", file);
22874 fputc ('\n', file);
22879 arm_emit_vector_const (FILE *file, rtx x)
22882 const char * pattern;
22884 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22886 switch (GET_MODE (x))
22888 case V2SImode: pattern = "%08x"; break;
22889 case V4HImode: pattern = "%04x"; break;
22890 case V8QImode: pattern = "%02x"; break;
22891 default: gcc_unreachable ();
22894 fprintf (file, "0x");
22895 for (i = CONST_VECTOR_NUNITS (x); i--;)
22899 element = CONST_VECTOR_ELT (x, i);
22900 fprintf (file, pattern, INTVAL (element));
22906 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22907 HFmode constant pool entries are actually loaded with ldr. */
22909 arm_emit_fp16_const (rtx c)
22914 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22915 bits = real_to_target (NULL, &r, HFmode);
22916 if (WORDS_BIG_ENDIAN)
22917 assemble_zeros (2);
22918 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22919 if (!WORDS_BIG_ENDIAN)
22920 assemble_zeros (2);
22924 arm_output_load_gr (rtx *operands)
22931 if (GET_CODE (operands [1]) != MEM
22932 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22933 || GET_CODE (reg = XEXP (sum, 0)) != REG
22934 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22935 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22936 return "wldrw%?\t%0, %1";
22938 /* Fix up an out-of-range load of a GR register. */
22939 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22940 wcgr = operands[0];
22942 output_asm_insn ("ldr%?\t%0, %1", operands);
22944 operands[0] = wcgr;
22946 output_asm_insn ("tmcr%?\t%0, %1", operands);
22947 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
22952 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22954 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22955 named arg and all anonymous args onto the stack.
22956 XXX I know the prologue shouldn't be pushing registers, but it is faster
22960 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
22961 enum machine_mode mode,
22964 int second_time ATTRIBUTE_UNUSED)
22966 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
22969 cfun->machine->uses_anonymous_args = 1;
22970 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
22972 nregs = pcum->aapcs_ncrn;
22973 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
22977 nregs = pcum->nregs;
22979 if (nregs < NUM_ARG_REGS)
22980 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
22983 /* Return nonzero if the CONSUMER instruction (a store) does not need
22984 PRODUCER's value to calculate the address. */
22987 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
22989 rtx value = PATTERN (producer);
22990 rtx addr = PATTERN (consumer);
22992 if (GET_CODE (value) == COND_EXEC)
22993 value = COND_EXEC_CODE (value);
22994 if (GET_CODE (value) == PARALLEL)
22995 value = XVECEXP (value, 0, 0);
22996 value = XEXP (value, 0);
22997 if (GET_CODE (addr) == COND_EXEC)
22998 addr = COND_EXEC_CODE (addr);
22999 if (GET_CODE (addr) == PARALLEL)
23000 addr = XVECEXP (addr, 0, 0);
23001 addr = XEXP (addr, 0);
23003 return !reg_overlap_mentioned_p (value, addr);
23006 /* Return nonzero if the CONSUMER instruction (a store) does need
23007 PRODUCER's value to calculate the address. */
23010 arm_early_store_addr_dep (rtx producer, rtx consumer)
23012 return !arm_no_early_store_addr_dep (producer, consumer);
23015 /* Return nonzero if the CONSUMER instruction (a load) does need
23016 PRODUCER's value to calculate the address. */
23019 arm_early_load_addr_dep (rtx producer, rtx consumer)
23021 rtx value = PATTERN (producer);
23022 rtx addr = PATTERN (consumer);
23024 if (GET_CODE (value) == COND_EXEC)
23025 value = COND_EXEC_CODE (value);
23026 if (GET_CODE (value) == PARALLEL)
23027 value = XVECEXP (value, 0, 0);
23028 value = XEXP (value, 0);
23029 if (GET_CODE (addr) == COND_EXEC)
23030 addr = COND_EXEC_CODE (addr);
23031 if (GET_CODE (addr) == PARALLEL)
23032 addr = XVECEXP (addr, 0, 0);
23033 addr = XEXP (addr, 1);
23035 return reg_overlap_mentioned_p (value, addr);
23038 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23039 have an early register shift value or amount dependency on the
23040 result of PRODUCER. */
23043 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23045 rtx value = PATTERN (producer);
23046 rtx op = PATTERN (consumer);
23049 if (GET_CODE (value) == COND_EXEC)
23050 value = COND_EXEC_CODE (value);
23051 if (GET_CODE (value) == PARALLEL)
23052 value = XVECEXP (value, 0, 0);
23053 value = XEXP (value, 0);
23054 if (GET_CODE (op) == COND_EXEC)
23055 op = COND_EXEC_CODE (op);
23056 if (GET_CODE (op) == PARALLEL)
23057 op = XVECEXP (op, 0, 0);
23060 early_op = XEXP (op, 0);
23061 /* This is either an actual independent shift, or a shift applied to
23062 the first operand of another operation. We want the whole shift
23064 if (GET_CODE (early_op) == REG)
23067 return !reg_overlap_mentioned_p (value, early_op);
23070 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23071 have an early register shift value dependency on the result of
23075 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23077 rtx value = PATTERN (producer);
23078 rtx op = PATTERN (consumer);
23081 if (GET_CODE (value) == COND_EXEC)
23082 value = COND_EXEC_CODE (value);
23083 if (GET_CODE (value) == PARALLEL)
23084 value = XVECEXP (value, 0, 0);
23085 value = XEXP (value, 0);
23086 if (GET_CODE (op) == COND_EXEC)
23087 op = COND_EXEC_CODE (op);
23088 if (GET_CODE (op) == PARALLEL)
23089 op = XVECEXP (op, 0, 0);
23092 early_op = XEXP (op, 0);
23094 /* This is either an actual independent shift, or a shift applied to
23095 the first operand of another operation. We want the value being
23096 shifted, in either case. */
23097 if (GET_CODE (early_op) != REG)
23098 early_op = XEXP (early_op, 0);
23100 return !reg_overlap_mentioned_p (value, early_op);
23103 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23104 have an early register mult dependency on the result of
23108 arm_no_early_mul_dep (rtx producer, rtx consumer)
23110 rtx value = PATTERN (producer);
23111 rtx op = PATTERN (consumer);
23113 if (GET_CODE (value) == COND_EXEC)
23114 value = COND_EXEC_CODE (value);
23115 if (GET_CODE (value) == PARALLEL)
23116 value = XVECEXP (value, 0, 0);
23117 value = XEXP (value, 0);
23118 if (GET_CODE (op) == COND_EXEC)
23119 op = COND_EXEC_CODE (op);
23120 if (GET_CODE (op) == PARALLEL)
23121 op = XVECEXP (op, 0, 0);
23124 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23126 if (GET_CODE (XEXP (op, 0)) == MULT)
23127 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23129 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23135 /* We can't rely on the caller doing the proper promotion when
23136 using APCS or ATPCS. */
23139 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23141 return !TARGET_AAPCS_BASED;
23144 static enum machine_mode
23145 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23146 enum machine_mode mode,
23147 int *punsignedp ATTRIBUTE_UNUSED,
23148 const_tree fntype ATTRIBUTE_UNUSED,
23149 int for_return ATTRIBUTE_UNUSED)
23151 if (GET_MODE_CLASS (mode) == MODE_INT
23152 && GET_MODE_SIZE (mode) < 4)
23158 /* AAPCS based ABIs use short enums by default. */
23161 arm_default_short_enums (void)
23163 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23167 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23170 arm_align_anon_bitfield (void)
23172 return TARGET_AAPCS_BASED;
23176 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23179 arm_cxx_guard_type (void)
23181 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23184 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23185 has an accumulator dependency on the result of the producer (a
23186 multiplication instruction) and no other dependency on that result. */
23188 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23190 rtx mul = PATTERN (producer);
23191 rtx mac = PATTERN (consumer);
23193 rtx mac_op0, mac_op1, mac_acc;
23195 if (GET_CODE (mul) == COND_EXEC)
23196 mul = COND_EXEC_CODE (mul);
23197 if (GET_CODE (mac) == COND_EXEC)
23198 mac = COND_EXEC_CODE (mac);
23200 /* Check that mul is of the form (set (...) (mult ...))
23201 and mla is of the form (set (...) (plus (mult ...) (...))). */
23202 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23203 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23204 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23207 mul_result = XEXP (mul, 0);
23208 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23209 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23210 mac_acc = XEXP (XEXP (mac, 1), 1);
23212 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23213 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23214 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23218 /* The EABI says test the least significant bit of a guard variable. */
23221 arm_cxx_guard_mask_bit (void)
23223 return TARGET_AAPCS_BASED;
23227 /* The EABI specifies that all array cookies are 8 bytes long. */
23230 arm_get_cookie_size (tree type)
23234 if (!TARGET_AAPCS_BASED)
23235 return default_cxx_get_cookie_size (type);
23237 size = build_int_cst (sizetype, 8);
23242 /* The EABI says that array cookies should also contain the element size. */
23245 arm_cookie_has_size (void)
23247 return TARGET_AAPCS_BASED;
23251 /* The EABI says constructors and destructors should return a pointer to
23252 the object constructed/destroyed. */
23255 arm_cxx_cdtor_returns_this (void)
23257 return TARGET_AAPCS_BASED;
23260 /* The EABI says that an inline function may never be the key
23264 arm_cxx_key_method_may_be_inline (void)
23266 return !TARGET_AAPCS_BASED;
23270 arm_cxx_determine_class_data_visibility (tree decl)
23272 if (!TARGET_AAPCS_BASED
23273 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23276 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23277 is exported. However, on systems without dynamic vague linkage,
23278 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23279 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23280 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23282 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23283 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23287 arm_cxx_class_data_always_comdat (void)
23289 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23290 vague linkage if the class has no key function. */
23291 return !TARGET_AAPCS_BASED;
23295 /* The EABI says __aeabi_atexit should be used to register static
23299 arm_cxx_use_aeabi_atexit (void)
23301 return TARGET_AAPCS_BASED;
23306 arm_set_return_address (rtx source, rtx scratch)
23308 arm_stack_offsets *offsets;
23309 HOST_WIDE_INT delta;
23311 unsigned long saved_regs;
23313 offsets = arm_get_frame_offsets ();
23314 saved_regs = offsets->saved_regs_mask;
23316 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23317 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23320 if (frame_pointer_needed)
23321 addr = plus_constant(hard_frame_pointer_rtx, -4);
23324 /* LR will be the first saved register. */
23325 delta = offsets->outgoing_args - (offsets->frame + 4);
23330 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23331 GEN_INT (delta & ~4095)));
23336 addr = stack_pointer_rtx;
23338 addr = plus_constant (addr, delta);
23340 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23346 thumb_set_return_address (rtx source, rtx scratch)
23348 arm_stack_offsets *offsets;
23349 HOST_WIDE_INT delta;
23350 HOST_WIDE_INT limit;
23353 unsigned long mask;
23357 offsets = arm_get_frame_offsets ();
23358 mask = offsets->saved_regs_mask;
23359 if (mask & (1 << LR_REGNUM))
23362 /* Find the saved regs. */
23363 if (frame_pointer_needed)
23365 delta = offsets->soft_frame - offsets->saved_args;
23366 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23372 delta = offsets->outgoing_args - offsets->saved_args;
23375 /* Allow for the stack frame. */
23376 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23378 /* The link register is always the first saved register. */
23381 /* Construct the address. */
23382 addr = gen_rtx_REG (SImode, reg);
23385 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23386 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23390 addr = plus_constant (addr, delta);
23392 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23395 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23398 /* Implements target hook vector_mode_supported_p. */
23400 arm_vector_mode_supported_p (enum machine_mode mode)
23402 /* Neon also supports V2SImode, etc. listed in the clause below. */
23403 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23404 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23407 if ((TARGET_NEON || TARGET_IWMMXT)
23408 && ((mode == V2SImode)
23409 || (mode == V4HImode)
23410 || (mode == V8QImode)))
23413 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23414 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23415 || mode == V2HAmode))
23421 /* Implements target hook array_mode_supported_p. */
23424 arm_array_mode_supported_p (enum machine_mode mode,
23425 unsigned HOST_WIDE_INT nelems)
23428 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23429 && (nelems >= 2 && nelems <= 4))
23435 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23436 registers when autovectorizing for Neon, at least until multiple vector
23437 widths are supported properly by the middle-end. */
23439 static enum machine_mode
23440 arm_preferred_simd_mode (enum machine_mode mode)
23446 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23448 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23450 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23452 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23454 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23461 if (TARGET_REALLY_IWMMXT)
23477 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23479 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23480 using r0-r4 for function arguments, r7 for the stack frame and don't have
23481 enough left over to do doubleword arithmetic. For Thumb-2 all the
23482 potentially problematic instructions accept high registers so this is not
23483 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23484 that require many low registers. */
23486 arm_class_likely_spilled_p (reg_class_t rclass)
23488 if ((TARGET_THUMB1 && rclass == LO_REGS)
23489 || rclass == CC_REG)
23495 /* Implements target hook small_register_classes_for_mode_p. */
23497 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23499 return TARGET_THUMB1;
23502 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23503 ARM insns and therefore guarantee that the shift count is modulo 256.
23504 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23505 guarantee no particular behavior for out-of-range counts. */
23507 static unsigned HOST_WIDE_INT
23508 arm_shift_truncation_mask (enum machine_mode mode)
23510 return mode == SImode ? 255 : 0;
23514 /* Map internal gcc register numbers to DWARF2 register numbers. */
23517 arm_dbx_register_number (unsigned int regno)
23522 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23523 compatibility. The EABI defines them as registers 96-103. */
23524 if (IS_FPA_REGNUM (regno))
23525 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23527 if (IS_VFP_REGNUM (regno))
23529 /* See comment in arm_dwarf_register_span. */
23530 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23531 return 64 + regno - FIRST_VFP_REGNUM;
23533 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23536 if (IS_IWMMXT_GR_REGNUM (regno))
23537 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23539 if (IS_IWMMXT_REGNUM (regno))
23540 return 112 + regno - FIRST_IWMMXT_REGNUM;
23542 gcc_unreachable ();
23545 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23546 GCC models tham as 64 32-bit registers, so we need to describe this to
23547 the DWARF generation code. Other registers can use the default. */
23549 arm_dwarf_register_span (rtx rtl)
23556 regno = REGNO (rtl);
23557 if (!IS_VFP_REGNUM (regno))
23560 /* XXX FIXME: The EABI defines two VFP register ranges:
23561 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23563 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23564 corresponding D register. Until GDB supports this, we shall use the
23565 legacy encodings. We also use these encodings for D0-D15 for
23566 compatibility with older debuggers. */
23567 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23570 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23571 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23572 regno = (regno - FIRST_VFP_REGNUM) / 2;
23573 for (i = 0; i < nregs; i++)
23574 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23579 #if ARM_UNWIND_INFO
23580 /* Emit unwind directives for a store-multiple instruction or stack pointer
23581 push during alignment.
23582 These should only ever be generated by the function prologue code, so
23583 expect them to have a particular form. */
23586 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23589 HOST_WIDE_INT offset;
23590 HOST_WIDE_INT nregs;
23596 e = XVECEXP (p, 0, 0);
23597 if (GET_CODE (e) != SET)
23600 /* First insn will adjust the stack pointer. */
23601 if (GET_CODE (e) != SET
23602 || GET_CODE (XEXP (e, 0)) != REG
23603 || REGNO (XEXP (e, 0)) != SP_REGNUM
23604 || GET_CODE (XEXP (e, 1)) != PLUS)
23607 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23608 nregs = XVECLEN (p, 0) - 1;
23610 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23613 /* The function prologue may also push pc, but not annotate it as it is
23614 never restored. We turn this into a stack pointer adjustment. */
23615 if (nregs * 4 == offset - 4)
23617 fprintf (asm_out_file, "\t.pad #4\n");
23621 fprintf (asm_out_file, "\t.save {");
23623 else if (IS_VFP_REGNUM (reg))
23626 fprintf (asm_out_file, "\t.vsave {");
23628 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23630 /* FPA registers are done differently. */
23631 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23635 /* Unknown register type. */
23638 /* If the stack increment doesn't match the size of the saved registers,
23639 something has gone horribly wrong. */
23640 if (offset != nregs * reg_size)
23645 /* The remaining insns will describe the stores. */
23646 for (i = 1; i <= nregs; i++)
23648 /* Expect (set (mem <addr>) (reg)).
23649 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23650 e = XVECEXP (p, 0, i);
23651 if (GET_CODE (e) != SET
23652 || GET_CODE (XEXP (e, 0)) != MEM
23653 || GET_CODE (XEXP (e, 1)) != REG)
23656 reg = REGNO (XEXP (e, 1));
23661 fprintf (asm_out_file, ", ");
23662 /* We can't use %r for vfp because we need to use the
23663 double precision register names. */
23664 if (IS_VFP_REGNUM (reg))
23665 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23667 asm_fprintf (asm_out_file, "%r", reg);
23669 #ifdef ENABLE_CHECKING
23670 /* Check that the addresses are consecutive. */
23671 e = XEXP (XEXP (e, 0), 0);
23672 if (GET_CODE (e) == PLUS)
23674 offset += reg_size;
23675 if (GET_CODE (XEXP (e, 0)) != REG
23676 || REGNO (XEXP (e, 0)) != SP_REGNUM
23677 || GET_CODE (XEXP (e, 1)) != CONST_INT
23678 || offset != INTVAL (XEXP (e, 1)))
23682 || GET_CODE (e) != REG
23683 || REGNO (e) != SP_REGNUM)
23687 fprintf (asm_out_file, "}\n");
23690 /* Emit unwind directives for a SET. */
23693 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23701 switch (GET_CODE (e0))
23704 /* Pushing a single register. */
23705 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23706 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23707 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23710 asm_fprintf (asm_out_file, "\t.save ");
23711 if (IS_VFP_REGNUM (REGNO (e1)))
23712 asm_fprintf(asm_out_file, "{d%d}\n",
23713 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23715 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23719 if (REGNO (e0) == SP_REGNUM)
23721 /* A stack increment. */
23722 if (GET_CODE (e1) != PLUS
23723 || GET_CODE (XEXP (e1, 0)) != REG
23724 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23725 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23728 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23729 -INTVAL (XEXP (e1, 1)));
23731 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23733 HOST_WIDE_INT offset;
23735 if (GET_CODE (e1) == PLUS)
23737 if (GET_CODE (XEXP (e1, 0)) != REG
23738 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23740 reg = REGNO (XEXP (e1, 0));
23741 offset = INTVAL (XEXP (e1, 1));
23742 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23743 HARD_FRAME_POINTER_REGNUM, reg,
23746 else if (GET_CODE (e1) == REG)
23749 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23750 HARD_FRAME_POINTER_REGNUM, reg);
23755 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23757 /* Move from sp to reg. */
23758 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23760 else if (GET_CODE (e1) == PLUS
23761 && GET_CODE (XEXP (e1, 0)) == REG
23762 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23763 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23765 /* Set reg to offset from sp. */
23766 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23767 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23779 /* Emit unwind directives for the given insn. */
23782 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23785 bool handled_one = false;
23787 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23790 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23791 && (TREE_NOTHROW (current_function_decl)
23792 || crtl->all_throwers_are_sibcalls))
23795 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23798 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23800 pat = XEXP (note, 0);
23801 switch (REG_NOTE_KIND (note))
23803 case REG_FRAME_RELATED_EXPR:
23806 case REG_CFA_REGISTER:
23809 pat = PATTERN (insn);
23810 if (GET_CODE (pat) == PARALLEL)
23811 pat = XVECEXP (pat, 0, 0);
23814 /* Only emitted for IS_STACKALIGN re-alignment. */
23819 src = SET_SRC (pat);
23820 dest = SET_DEST (pat);
23822 gcc_assert (src == stack_pointer_rtx);
23823 reg = REGNO (dest);
23824 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23827 handled_one = true;
23830 case REG_CFA_DEF_CFA:
23831 case REG_CFA_EXPRESSION:
23832 case REG_CFA_ADJUST_CFA:
23833 case REG_CFA_OFFSET:
23834 /* ??? Only handling here what we actually emit. */
23835 gcc_unreachable ();
23843 pat = PATTERN (insn);
23846 switch (GET_CODE (pat))
23849 arm_unwind_emit_set (asm_out_file, pat);
23853 /* Store multiple. */
23854 arm_unwind_emit_sequence (asm_out_file, pat);
23863 /* Output a reference from a function exception table to the type_info
23864 object X. The EABI specifies that the symbol should be relocated by
23865 an R_ARM_TARGET2 relocation. */
23868 arm_output_ttype (rtx x)
23870 fputs ("\t.word\t", asm_out_file);
23871 output_addr_const (asm_out_file, x);
23872 /* Use special relocations for symbol references. */
23873 if (GET_CODE (x) != CONST_INT)
23874 fputs ("(TARGET2)", asm_out_file);
23875 fputc ('\n', asm_out_file);
23880 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23883 arm_asm_emit_except_personality (rtx personality)
23885 fputs ("\t.personality\t", asm_out_file);
23886 output_addr_const (asm_out_file, personality);
23887 fputc ('\n', asm_out_file);
23890 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23893 arm_asm_init_sections (void)
23895 exception_section = get_unnamed_section (0, output_section_asm_op,
23898 #endif /* ARM_UNWIND_INFO */
23900 /* Output unwind directives for the start/end of a function. */
23903 arm_output_fn_unwind (FILE * f, bool prologue)
23905 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23909 fputs ("\t.fnstart\n", f);
23912 /* If this function will never be unwound, then mark it as such.
23913 The came condition is used in arm_unwind_emit to suppress
23914 the frame annotations. */
23915 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23916 && (TREE_NOTHROW (current_function_decl)
23917 || crtl->all_throwers_are_sibcalls))
23918 fputs("\t.cantunwind\n", f);
23920 fputs ("\t.fnend\n", f);
23925 arm_emit_tls_decoration (FILE *fp, rtx x)
23927 enum tls_reloc reloc;
23930 val = XVECEXP (x, 0, 0);
23931 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23933 output_addr_const (fp, val);
23938 fputs ("(tlsgd)", fp);
23941 fputs ("(tlsldm)", fp);
23944 fputs ("(tlsldo)", fp);
23947 fputs ("(gottpoff)", fp);
23950 fputs ("(tpoff)", fp);
23953 fputs ("(tlsdesc)", fp);
23956 gcc_unreachable ();
23965 fputs (" + (. - ", fp);
23966 output_addr_const (fp, XVECEXP (x, 0, 2));
23967 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
23968 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
23969 output_addr_const (fp, XVECEXP (x, 0, 3));
23979 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23982 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
23984 gcc_assert (size == 4);
23985 fputs ("\t.word\t", file);
23986 output_addr_const (file, x);
23987 fputs ("(tlsldo)", file);
23990 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23993 arm_output_addr_const_extra (FILE *fp, rtx x)
23995 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
23996 return arm_emit_tls_decoration (fp, x);
23997 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24000 int labelno = INTVAL (XVECEXP (x, 0, 0));
24002 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24003 assemble_name_raw (fp, label);
24007 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24009 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24013 output_addr_const (fp, XVECEXP (x, 0, 0));
24017 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24019 output_addr_const (fp, XVECEXP (x, 0, 0));
24023 output_addr_const (fp, XVECEXP (x, 0, 1));
24027 else if (GET_CODE (x) == CONST_VECTOR)
24028 return arm_emit_vector_const (fp, x);
24033 /* Output assembly for a shift instruction.
24034 SET_FLAGS determines how the instruction modifies the condition codes.
24035 0 - Do not set condition codes.
24036 1 - Set condition codes.
24037 2 - Use smallest instruction. */
24039 arm_output_shift(rtx * operands, int set_flags)
24042 static const char flag_chars[3] = {'?', '.', '!'};
24047 c = flag_chars[set_flags];
24048 if (TARGET_UNIFIED_ASM)
24050 shift = shift_op(operands[3], &val);
24054 operands[2] = GEN_INT(val);
24055 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24058 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24061 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24062 output_asm_insn (pattern, operands);
24066 /* Output a Thumb-1 casesi dispatch sequence. */
24068 thumb1_output_casesi (rtx *operands)
24070 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24072 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24074 switch (GET_MODE(diff_vec))
24077 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24078 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24080 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24081 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24083 return "bl\t%___gnu_thumb1_case_si";
24085 gcc_unreachable ();
24089 /* Output a Thumb-2 casesi instruction. */
24091 thumb2_output_casesi (rtx *operands)
24093 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24095 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24097 output_asm_insn ("cmp\t%0, %1", operands);
24098 output_asm_insn ("bhi\t%l3", operands);
24099 switch (GET_MODE(diff_vec))
24102 return "tbb\t[%|pc, %0]";
24104 return "tbh\t[%|pc, %0, lsl #1]";
24108 output_asm_insn ("adr\t%4, %l2", operands);
24109 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24110 output_asm_insn ("add\t%4, %4, %5", operands);
24115 output_asm_insn ("adr\t%4, %l2", operands);
24116 return "ldr\t%|pc, [%4, %0, lsl #2]";
24119 gcc_unreachable ();
24123 /* Most ARM cores are single issue, but some newer ones can dual issue.
24124 The scheduler descriptions rely on this being correct. */
24126 arm_issue_rate (void)
24145 /* A table and a function to perform ARM-specific name mangling for
24146 NEON vector types in order to conform to the AAPCS (see "Procedure
24147 Call Standard for the ARM Architecture", Appendix A). To qualify
24148 for emission with the mangled names defined in that document, a
24149 vector type must not only be of the correct mode but also be
24150 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24153 enum machine_mode mode;
24154 const char *element_type_name;
24155 const char *aapcs_name;
24156 } arm_mangle_map_entry;
24158 static arm_mangle_map_entry arm_mangle_map[] = {
24159 /* 64-bit containerized types. */
24160 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24161 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24162 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24163 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24164 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24165 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24166 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24167 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24168 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24169 /* 128-bit containerized types. */
24170 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24171 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24172 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24173 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24174 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24175 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24176 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24177 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24178 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24179 { VOIDmode, NULL, NULL }
24183 arm_mangle_type (const_tree type)
24185 arm_mangle_map_entry *pos = arm_mangle_map;
24187 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24188 has to be managled as if it is in the "std" namespace. */
24189 if (TARGET_AAPCS_BASED
24190 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24192 static bool warned;
24193 if (!warned && warn_psabi && !in_system_header)
24196 inform (input_location,
24197 "the mangling of %<va_list%> has changed in GCC 4.4");
24199 return "St9__va_list";
24202 /* Half-precision float. */
24203 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24206 if (TREE_CODE (type) != VECTOR_TYPE)
24209 /* Check the mode of the vector type, and the name of the vector
24210 element type, against the table. */
24211 while (pos->mode != VOIDmode)
24213 tree elt_type = TREE_TYPE (type);
24215 if (pos->mode == TYPE_MODE (type)
24216 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24217 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24218 pos->element_type_name))
24219 return pos->aapcs_name;
24224 /* Use the default mangling for unrecognized (possibly user-defined)
24229 /* Order of allocation of core registers for Thumb: this allocation is
24230 written over the corresponding initial entries of the array
24231 initialized with REG_ALLOC_ORDER. We allocate all low registers
24232 first. Saving and restoring a low register is usually cheaper than
24233 using a call-clobbered high register. */
24235 static const int thumb_core_reg_alloc_order[] =
24237 3, 2, 1, 0, 4, 5, 6, 7,
24238 14, 12, 8, 9, 10, 11, 13, 15
24241 /* Adjust register allocation order when compiling for Thumb. */
24244 arm_order_regs_for_local_alloc (void)
24246 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24247 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24249 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24250 sizeof (thumb_core_reg_alloc_order));
24253 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24256 arm_frame_pointer_required (void)
24258 return (cfun->has_nonlocal_label
24259 || SUBTARGET_FRAME_POINTER_REQUIRED
24260 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24263 /* Only thumb1 can't support conditional execution, so return true if
24264 the target is not thumb1. */
24266 arm_have_conditional_execution (void)
24268 return !TARGET_THUMB1;
24271 /* Legitimize a memory reference for sync primitive implemented using
24272 ldrex / strex. We currently force the form of the reference to be
24273 indirect without offset. We do not yet support the indirect offset
24274 addressing supported by some ARM targets for these
24277 arm_legitimize_sync_memory (rtx memory)
24279 rtx addr = force_reg (Pmode, XEXP (memory, 0));
24280 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
24282 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
24283 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
24284 return legitimate_memory;
24287 /* An instruction emitter. */
24288 typedef void (* emit_f) (int label, const char *, rtx *);
24290 /* An instruction emitter that emits via the conventional
24291 output_asm_insn. */
24293 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
24295 output_asm_insn (pattern, operands);
24298 /* Count the number of emitted synchronization instructions. */
24299 static unsigned arm_insn_count;
24301 /* An emitter that counts emitted instructions but does not actually
24302 emit instruction into the instruction stream. */
24304 arm_count (int label,
24305 const char *pattern ATTRIBUTE_UNUSED,
24306 rtx *operands ATTRIBUTE_UNUSED)
24312 /* Construct a pattern using conventional output formatting and feed
24313 it to output_asm_insn. Provides a mechanism to construct the
24314 output pattern on the fly. Note the hard limit on the pattern
24316 static void ATTRIBUTE_PRINTF_4
24317 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
24318 const char *pattern, ...)
24323 va_start (ap, pattern);
24324 vsprintf (buffer, pattern, ap);
24326 emit (label, buffer, operands);
24329 /* Emit the memory barrier instruction, if any, provided by this
24330 target to a specified emitter. */
24332 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
24334 if (TARGET_HAVE_DMB)
24336 /* Note we issue a system level barrier. We should consider
24337 issuing a inner shareabilty zone barrier here instead, ie.
24339 emit (0, "dmb\tsy", operands);
24343 if (TARGET_HAVE_DMB_MCR)
24345 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
24349 gcc_unreachable ();
24352 /* Emit the memory barrier instruction, if any, provided by this
24355 arm_output_memory_barrier (rtx *operands)
24357 arm_process_output_memory_barrier (arm_emit, operands);
24361 /* Helper to figure out the instruction suffix required on ldrex/strex
24362 for operations on an object of the specified mode. */
24363 static const char *
24364 arm_ldrex_suffix (enum machine_mode mode)
24368 case QImode: return "b";
24369 case HImode: return "h";
24370 case SImode: return "";
24371 case DImode: return "d";
24373 gcc_unreachable ();
24378 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
24381 arm_output_ldrex (emit_f emit,
24382 enum machine_mode mode,
24388 operands[0] = target;
24389 if (mode != DImode)
24391 const char *suffix = arm_ldrex_suffix (mode);
24392 operands[1] = memory;
24393 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
24397 /* The restrictions on target registers in ARM mode are that the two
24398 registers are consecutive and the first one is even; Thumb is
24399 actually more flexible, but DI should give us this anyway.
24400 Note that the 1st register always gets the lowest word in memory. */
24401 gcc_assert ((REGNO (target) & 1) == 0);
24402 operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
24403 operands[2] = memory;
24404 arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
24408 /* Emit a strex{b,h,d, } instruction appropriate for the specified
24411 arm_output_strex (emit_f emit,
24412 enum machine_mode mode,
24420 operands[0] = result;
24421 operands[1] = value;
24422 if (mode != DImode)
24424 const char *suffix = arm_ldrex_suffix (mode);
24425 operands[2] = memory;
24426 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
24431 /* The restrictions on target registers in ARM mode are that the two
24432 registers are consecutive and the first one is even; Thumb is
24433 actually more flexible, but DI should give us this anyway.
24434 Note that the 1st register always gets the lowest word in memory. */
24435 gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
24436 operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
24437 operands[3] = memory;
24438 arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
24443 /* Helper to emit an it instruction in Thumb2 mode only; although the assembler
24444 will ignore it in ARM mode, emitting it will mess up instruction counts we
24445 sometimes keep 'flags' are the extra t's and e's if it's more than one
24446 instruction that is conditional. */
24448 arm_output_it (emit_f emit, const char *flags, const char *cond)
24450 rtx operands[1]; /* Don't actually use the operand. */
24452 arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
24455 /* Helper to emit a two operand instruction. */
24457 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
24463 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
24466 /* Helper to emit a three operand instruction. */
24468 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
24475 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
24478 /* Emit a load store exclusive synchronization loop.
24482 if old_value != required_value
24484 t1 = sync_op (old_value, new_value)
24485 [mem] = t1, t2 = [0|1]
24489 t1 == t2 is not permitted
24490 t1 == old_value is permitted
24494 RTX register representing the required old_value for
24495 the modify to continue, if NULL no comparsion is performed. */
24497 arm_output_sync_loop (emit_f emit,
24498 enum machine_mode mode,
24501 rtx required_value,
24505 enum attr_sync_op sync_op,
24506 int early_barrier_required)
24509 /* We'll use the lo for the normal rtx in the none-DI case
24510 as well as the least-sig word in the DI case. */
24511 rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
24512 rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
24514 bool is_di = mode == DImode;
24516 gcc_assert (t1 != t2);
24518 if (early_barrier_required)
24519 arm_process_output_memory_barrier (emit, NULL);
24521 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
24523 arm_output_ldrex (emit, mode, old_value, memory);
24527 old_value_lo = gen_lowpart (SImode, old_value);
24528 old_value_hi = gen_highpart (SImode, old_value);
24529 if (required_value)
24531 required_value_lo = gen_lowpart (SImode, required_value);
24532 required_value_hi = gen_highpart (SImode, required_value);
24536 /* Silence false potentially unused warning. */
24537 required_value_lo = NULL_RTX;
24538 required_value_hi = NULL_RTX;
24540 new_value_lo = gen_lowpart (SImode, new_value);
24541 new_value_hi = gen_highpart (SImode, new_value);
24542 t1_lo = gen_lowpart (SImode, t1);
24543 t1_hi = gen_highpart (SImode, t1);
24547 old_value_lo = old_value;
24548 new_value_lo = new_value;
24549 required_value_lo = required_value;
24552 /* Silence false potentially unused warning. */
24554 new_value_hi = NULL_RTX;
24555 required_value_hi = NULL_RTX;
24556 old_value_hi = NULL_RTX;
24559 if (required_value)
24561 operands[0] = old_value_lo;
24562 operands[1] = required_value_lo;
24564 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
24567 arm_output_it (emit, "", "eq");
24568 arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
24570 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
24576 arm_output_op3 (emit, is_di ? "adds" : "add",
24577 t1_lo, old_value_lo, new_value_lo);
24579 arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
24583 arm_output_op3 (emit, is_di ? "subs" : "sub",
24584 t1_lo, old_value_lo, new_value_lo);
24586 arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
24590 arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
24592 arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
24596 arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
24598 arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
24602 arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
24604 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24608 arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
24610 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24611 arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
24613 arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
24618 t1_lo = new_value_lo;
24620 t1_hi = new_value_hi;
24624 /* Note that the result of strex is a 0/1 flag that's always 1 register. */
24627 arm_output_strex (emit, mode, "", t2, t1, memory);
24629 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24630 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24631 LOCAL_LABEL_PREFIX);
24635 /* Use old_value for the return value because for some operations
24636 the old_value can easily be restored. This saves one register. */
24637 arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
24638 operands[0] = old_value_lo;
24639 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24640 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24641 LOCAL_LABEL_PREFIX);
24643 /* Note that we only used the _lo half of old_value as a temporary
24644 so in DI we don't have to restore the _hi part. */
24648 arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
24652 arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
24656 arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
24660 arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
24664 gcc_unreachable ();
24668 /* Note: label is before barrier so that in cmp failure case we still get
24669 a barrier to stop subsequent loads floating upwards past the ldrex
24670 PR target/48126. */
24671 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
24672 arm_process_output_memory_barrier (emit, NULL);
24676 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
24679 default_value = operands[index - 1];
24681 return default_value;
24684 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
24685 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
24687 /* Extract the operands for a synchroniztion instruction from the
24688 instructions attributes and emit the instruction. */
24690 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
24692 rtx result, memory, required_value, new_value, t1, t2;
24694 enum machine_mode mode;
24695 enum attr_sync_op sync_op;
24697 result = FETCH_SYNC_OPERAND(result, 0);
24698 memory = FETCH_SYNC_OPERAND(memory, 0);
24699 required_value = FETCH_SYNC_OPERAND(required_value, 0);
24700 new_value = FETCH_SYNC_OPERAND(new_value, 0);
24701 t1 = FETCH_SYNC_OPERAND(t1, 0);
24702 t2 = FETCH_SYNC_OPERAND(t2, 0);
24704 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
24705 sync_op = get_attr_sync_op (insn);
24706 mode = GET_MODE (memory);
24708 arm_output_sync_loop (emit, mode, result, memory, required_value,
24709 new_value, t1, t2, sync_op, early_barrier);
24712 /* Emit a synchronization instruction loop. */
24714 arm_output_sync_insn (rtx insn, rtx *operands)
24716 arm_process_output_sync_insn (arm_emit, insn, operands);
24720 /* Count the number of machine instruction that will be emitted for a
24721 synchronization instruction. Note that the emitter used does not
24722 emit instructions, it just counts instructions being carefull not
24723 to count labels. */
24725 arm_sync_loop_insns (rtx insn, rtx *operands)
24727 arm_insn_count = 0;
24728 arm_process_output_sync_insn (arm_count, insn, operands);
24729 return arm_insn_count;
24732 /* Helper to call a target sync instruction generator, dealing with
24733 the variation in operands required by the different generators. */
24735 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
24736 rtx memory, rtx required_value, rtx new_value)
24738 switch (generator->op)
24740 case arm_sync_generator_omn:
24741 gcc_assert (! required_value);
24742 return generator->u.omn (old_value, memory, new_value);
24744 case arm_sync_generator_omrn:
24745 gcc_assert (required_value);
24746 return generator->u.omrn (old_value, memory, required_value, new_value);
24752 /* Expand a synchronization loop. The synchronization loop is expanded
24753 as an opaque block of instructions in order to ensure that we do
24754 not subsequently get extraneous memory accesses inserted within the
24755 critical region. The exclusive access property of ldrex/strex is
24756 only guaranteed in there are no intervening memory accesses. */
24758 arm_expand_sync (enum machine_mode mode,
24759 struct arm_sync_generator *generator,
24760 rtx target, rtx memory, rtx required_value, rtx new_value)
24762 if (target == NULL)
24763 target = gen_reg_rtx (mode);
24765 memory = arm_legitimize_sync_memory (memory);
24766 if (mode != SImode && mode != DImode)
24768 rtx load_temp = gen_reg_rtx (SImode);
24770 if (required_value)
24771 required_value = convert_modes (SImode, mode, required_value, true);
24773 new_value = convert_modes (SImode, mode, new_value, true);
24774 emit_insn (arm_call_generator (generator, load_temp, memory,
24775 required_value, new_value));
24776 emit_move_insn (target, gen_lowpart (mode, load_temp));
24780 emit_insn (arm_call_generator (generator, target, memory, required_value,
24785 static unsigned int
24786 arm_autovectorize_vector_sizes (void)
24788 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24792 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24794 /* Vectors which aren't in packed structures will not be less aligned than
24795 the natural alignment of their element type, so this is safe. */
24796 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24799 return default_builtin_vector_alignment_reachable (type, is_packed);
24803 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24804 const_tree type, int misalignment,
24807 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24809 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24814 /* If the misalignment is unknown, we should be able to handle the access
24815 so long as it is not to a member of a packed data structure. */
24816 if (misalignment == -1)
24819 /* Return true if the misalignment is a multiple of the natural alignment
24820 of the vector's element type. This is probably always going to be
24821 true in practice, since we've already established that this isn't a
24823 return ((misalignment % align) == 0);
24826 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24831 arm_conditional_register_usage (void)
24835 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24837 for (regno = FIRST_FPA_REGNUM;
24838 regno <= LAST_FPA_REGNUM; ++regno)
24839 fixed_regs[regno] = call_used_regs[regno] = 1;
24842 if (TARGET_THUMB1 && optimize_size)
24844 /* When optimizing for size on Thumb-1, it's better not
24845 to use the HI regs, because of the overhead of
24847 for (regno = FIRST_HI_REGNUM;
24848 regno <= LAST_HI_REGNUM; ++regno)
24849 fixed_regs[regno] = call_used_regs[regno] = 1;
24852 /* The link register can be clobbered by any branch insn,
24853 but we have no way to track that at present, so mark
24854 it as unavailable. */
24856 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24858 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24860 if (TARGET_MAVERICK)
24862 for (regno = FIRST_FPA_REGNUM;
24863 regno <= LAST_FPA_REGNUM; ++ regno)
24864 fixed_regs[regno] = call_used_regs[regno] = 1;
24865 for (regno = FIRST_CIRRUS_FP_REGNUM;
24866 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24868 fixed_regs[regno] = 0;
24869 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24874 /* VFPv3 registers are disabled when earlier VFP
24875 versions are selected due to the definition of
24876 LAST_VFP_REGNUM. */
24877 for (regno = FIRST_VFP_REGNUM;
24878 regno <= LAST_VFP_REGNUM; ++ regno)
24880 fixed_regs[regno] = 0;
24881 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24882 || regno >= FIRST_VFP_REGNUM + 32;
24887 if (TARGET_REALLY_IWMMXT)
24889 regno = FIRST_IWMMXT_GR_REGNUM;
24890 /* The 2002/10/09 revision of the XScale ABI has wCG0
24891 and wCG1 as call-preserved registers. The 2002/11/21
24892 revision changed this so that all wCG registers are
24893 scratch registers. */
24894 for (regno = FIRST_IWMMXT_GR_REGNUM;
24895 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24896 fixed_regs[regno] = 0;
24897 /* The XScale ABI has wR0 - wR9 as scratch registers,
24898 the rest as call-preserved registers. */
24899 for (regno = FIRST_IWMMXT_REGNUM;
24900 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24902 fixed_regs[regno] = 0;
24903 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24907 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24909 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24910 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24912 else if (TARGET_APCS_STACK)
24914 fixed_regs[10] = 1;
24915 call_used_regs[10] = 1;
24917 /* -mcaller-super-interworking reserves r11 for calls to
24918 _interwork_r11_call_via_rN(). Making the register global
24919 is an easy way of ensuring that it remains valid for all
24921 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24922 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24924 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24925 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24926 if (TARGET_CALLER_INTERWORKING)
24927 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24929 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24933 arm_preferred_rename_class (reg_class_t rclass)
24935 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24936 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24937 and code size can be reduced. */
24938 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24944 /* Compute the atrribute "length" of insn "*push_multi".
24945 So this function MUST be kept in sync with that insn pattern. */
24947 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24949 int i, regno, hi_reg;
24950 int num_saves = XVECLEN (parallel_op, 0);
24960 regno = REGNO (first_op);
24961 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24962 for (i = 1; i < num_saves && !hi_reg; i++)
24964 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24965 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24973 /* Compute the number of instructions emitted by output_move_double. */
24975 arm_count_output_move_double_insns (rtx *operands)
24978 output_move_double (operands, false, &count);
24982 #include "gt-arm.h"